Source code for lammps_interface.structure_data

#/usr/bin/env python
"""
Molecular graph and structure io methods.
"""
from datetime import date
import numpy as np
from scipy.spatial import distance
import math
import shlex
import re
from .CIFIO import CIF, get_time
from .atomic import METALS, MASS, COVALENT_RADII
from copy import copy
from .mof_sbus import InorganicCluster, OrganicCluster
from copy import deepcopy
import itertools
import os
import sys
from .generic_raspa import GENERIC_PSEUDO_ATOMS_HEADER, GENERIC_PSEUDO_ATOMS
from .generic_raspa import GENERIC_FF_MIXING_HEADER, GENERIC_FF_MIXING
from .generic_raspa import GENERIC_FF_MIXING_FOOTER
from .uff import UFF_DATA
import networkx as nx
import operator

try:
    import networkx as nx
    from networkx.algorithms import approximation

except ImportError:
    print("Warning: could not load networkx module, this is needed to produce the lammps data file.")
    sys.exit()
from collections import OrderedDict
from .atomic import MASS, ATOMIC_NUMBER, COVALENT_RADII
from .atomic import organic, non_metals, noble_gases, metalloids, lanthanides, actinides, transition_metals
from .atomic import alkali, alkaline_earth, main_group, metals
from .ccdc import CCDC_BOND_ORDERS


DEG2RAD = np.pi / 180.


[docs]class MolecularGraph(nx.Graph):
    """Contains all information relating a structure file to a fully described classical system.

    Important specific arguments for atomic nodes:
      - mass
      - force_field_type
      - charge
      - cartesian_coordinates
      - description {contains all information about electronic environment
        to make a decision on the final force_field_type}
      - hybridization [sp3, sp2, sp, aromatic]

    Important arguments for bond edges:
      - weight = 1
      - length
      - image_flag
      - force_field_type
    """
    node_dict_factory = OrderedDict
[docs]    def __init__(self, **kwargs):
        """MolecularGraph constructor."""
        nx.Graph.__init__(self, **kwargs)
        # coordinates and distances will be kept in a matrix because
        # networkx edge and node lookup is slow.
        try:
            self.name = kwargs['name']
        except KeyError:
            self.name = 'default'
        self.coordinates = None
        self.distance_matrix = None
        self.original_size = 0
        self.molecule_id = 444
        self.inorganic_sbus = {}
        self.find_metal_sbus = False
        self.organic_sbus = {}
        self.find_organic_sbus = False
        self.cell = None
        self.rigid = False
        #TODO(pboyd): networkx edges do not store the nodes in order!
        # Have to keep a dictionary lookup to make sure the nodes
        # are referenced properly (particularly across periodic images)
        self.sorted_edge_dict = {}
        self.molecule_images = []


[docs]    def nodes_iter2(self, data=True):
        #FIXME(pboyd): latest version of NetworkX has removed nodes_iter...
        """Oh man, fixing to networkx 2.0

        This probably breaks a lot of stuff in the code. THANKS NETWORKX!!!!!!!1
	Extensive testing under way...

        """
        for node in self.nodes():
            if(data):
                data = self.node[node]
                yield (node, data)
            else:
                yield node

[docs]    def edges_iter2(self, data=True):
        for (n1,n2) in self.edges():
            v1,v2 = self.sorted_edge_dict[(n1,n2)]
            #d=self.edges[(n1,n2)]
            d=self[n1][n2]
            if(data):
                yield (v1,v2,d)
            else:
                yield (v1,v2)

        #for n1, n2, d in self.edges_iter(**kwargs):
        #    yield (self.sorted_edge_dict[(n1, n2)][0], self.sorted_edge_dict[(n1,n2)][1], d)

[docs]    def count_dihedrals(self):
        count = 0
        for n1, n2, data in self.edges_iter2(data=True):
            try:
                for dihed in data['dihedrals'].keys():
                    count += 1
            except KeyError:
                pass
        return count

[docs]    def count_angles(self):
        count = 0
        for node, data in self.nodes_iter2(data=True):
            try:
                for angle in data['angles'].keys():
                    count += 1
            except KeyError:
                pass
        return count

[docs]    def count_impropers(self):
        count = 0
        for node, data in self.nodes_iter2(data=True):
            try:
                for angle in data['impropers'].keys():
                    count += 1
            except KeyError:
                pass
        return count

[docs]    def reorder_labels(self, reorder_dic):
        """Re-order the labels of the nodes so that LAMMPS doesn't complain.
        This issue only arises when a supercell is built, but isolated molecules
        are not replicated in the supercell (by user request).
        This creates a discontinuity in the indices of the atoms, which breaks
        some features in LAMMPS.

        """

        old_nodes = sorted([(i,self.node[i]) for i in self.nodes()])
        #old_nodes = list(self.nodes_iter2(data=True))
        old_edges = list(self.edges_iter2(data=True))
        for node, data in old_nodes:

            if 'angles' in data:
                ang_data = list(data['angles'].items())
                for (a,c), val in ang_data:
                    data['angles'].pop((a,c))
                    data['angles'][(reorder_dic[a], reorder_dic[c])] = val
            if 'impropers' in data:
                imp_data = list(data['impropers'].items())
                for (a, c, d), val in imp_data:
                    data['impropers'].pop((a,c,d))
                    data['impropers'][(reorder_dic[a], reorder_dic[c], reorder_dic[d])] = val

            self.remove_node(node)
            data['index'] = reorder_dic[node]
            self.add_node(reorder_dic[node], **data)

        for b, c, data in old_edges:
            if 'dihedrals' in data:
                dihed_data = list(data['dihedrals'].items())
                for (a, d), val in dihed_data:
                    data['dihedrals'].pop((a,d))
                    data['dihedrals'][(reorder_dic[a], reorder_dic[d])] = val
            try:
                self.remove_edge(b,c)
            except nx.exception.NetworkXError:
                # edge already removed from 'remove_node' above
                pass
            self.add_edge(reorder_dic[b], reorder_dic[c], **data)

        old_edge_dict = self.sorted_edge_dict.items()
        self.sorted_edge_dict = {}
        for (a,b), val in old_edge_dict:
            self.sorted_edge_dict[(reorder_dic[a], reorder_dic[b])] = (reorder_dic[val[0]], reorder_dic[val[1]])

        old_images = self.molecule_images[:]
        self.molecule_images = []
        for m in old_images:
            newm = [reorder_dic[i] for i in m]
            self.molecule_images.append(newm)

[docs]    def add_atomic_node(self, **kwargs):
        """Insert nodes into the graph from the cif file"""
        #update keywords with more atom info
        # rename this to something more intuitive
        label="_atom_site_type_symbol"
        orig_keys = list(kwargs.keys())
        if(label not in kwargs):
            label = "_atom_site_label"
            if (label not in kwargs):
                print("ERROR: could not find the keyword for the element types in the cif file!"+
                        " Please use '_atom_site_type_symbol' or '_atom_site_label' for the element"+
                        " column.")
                sys.exit()

        charge_keywords = ["_atom_type_partial_charge",
                           "_atom_type_parital_charge",
                           "_atom_type_charge",
                           "_atom_site_charge" # RASPA cif file
                           ]
        element = kwargs.pop(label)

        # replacing Atom.__init__
        kwargs.update({'mass':MASS[element]})
        kwargs.update({'molid':self.molecule_id})
        kwargs.update({'element':element})
        kwargs.update({'cycle':False})
        kwargs.update({'rings':[]})
        kwargs.update({'atomic_number':ATOMIC_NUMBER.index(element)})
        kwargs.update({'pair_potential':None})
        kwargs.update({'h_bond_donor':False})
        kwargs.update({'h_bond_potential':None})
        kwargs.update({'tabulated_potential':False})
        kwargs.update({'table_potential':None})
        if set(orig_keys) & set(charge_keywords):
            key = list(set(orig_keys)&set(charge_keywords))[0]
            try:
                kwargs['charge'] = float(kwargs[key])
            except ValueError:
                print("Warning %s could not be converted "%(kwargs[key]) +
                      "to a charge value for atom %s"%(element) +
                      ", setting charge as 0.0 for this atom")
                kwargs['charge'] = 0.0
        else:
            kwargs['charge'] = 0.0
        try:
            fftype = kwargs.pop('_atom_site_description')
        except KeyError:
            fftype = None

        kwargs.update({'force_field_type':fftype})
        idx = self.number_of_nodes() + 1
        kwargs.update({'index':idx})
        #TODO(pboyd) should have some error checking here..
        try:
            n = kwargs.pop('_atom_site_label')
        except KeyError:
            n = label
        kwargs.update({'ciflabel':n})
        # to identify Cu paddlewheels, etc.
        #kwargs.update({'special_flag':None})
        self.add_node(idx, **kwargs)

[docs]    def compute_bonding(self, cell, scale_factor = 0.9):
        """Computes bonds between atoms based on covalent radii."""
        # here assume bonds exist, populate data with lengths and
        # symflags if needed.
        if (self.number_of_edges() > 0):
            # bonding found in cif file
            sf = []
            for n1, n2, data in self.edges_iter2(data=True):
                # get data['ciflabel'] for self.node[n1] and self.node[n2]
                # update the sorted_edge_dict with the indices, not the
                # cif labels
                n1data = self.node[n1]
                n2data = self.node[n2]
                n1label = n1data['ciflabel']
                n2label = n2data['ciflabel']
                try:
                    nn1, nn2 = self.sorted_edge_dict.pop((n1label, n2label))
                    if nn2 == n1label:
                        nn1 = n2
                        nn2 = n1
                    self.sorted_edge_dict.update({(n1, n2):(nn1, nn2)})
                    self.sorted_edge_dict.update({(n2, n1):(nn1, nn2)})
                except KeyError:
                    pass
                try:
                    nn1, nn2 = self.sorted_edge_dict.pop((n2label, n1label))
                    if nn2 == n1label:
                        nn1 = n2
                        nn2 = n1
                    self.sorted_edge_dict.update({(n2, n1):(nn1, nn2)})
                    self.sorted_edge_dict.update({(n1, n2):(nn1, nn2)})
                except KeyError:
                    pass

                sf.append(data['symflag'])
                bl = data['length']
                if bl <= 0.01:
                    id1, id2 = self.node[n1]['index']-1, self.node[n2]['index']-1
                    dist = self.distance_matrix[id1,id2]
                    data['length'] = dist

            if (set(sf) == set(['.'])):
                # compute sym flags
                for n1, n2, data in self.edges_iter2(data=True):
                    flag = self.compute_bond_image_flag(n1, n2, cell)
                    data['symflag'] = flag
            return

        # Here we will determine bonding from all atom pairs using
        # covalent radii.
        for n1, n2 in itertools.combinations(self.nodes(), 2):
            node1, node2 = self.node[n1], self.node[n2]
            e1, e2 = node1['element'],\
                    node2['element']
            elements = set([e1, e2])
            i1,i2 = node1['index']-1, node2['index']-1
            rad = (COVALENT_RADII[e1] + COVALENT_RADII[e2])
            dist = self.distance_matrix[i1,i2]
            tempsf = scale_factor
            # probably a better way to fix these kinds of issues..
            if (set("F") < elements) and  (elements & metals):
                tempsf = 0.8

            if (set("O") < elements) and (elements & metals):
                tempsf = 0.85
            # fix for water particle recognition.
            if(set(["O", "H"]) <= elements):
                tempsf = 0.8
            # fix for M-NDISA MOFs 
            if(set(["O", "C"]) <= elements):
                tempsf = 0.8
            if (set("O") < elements) and (elements & metals):
                tempsf = 0.82
                                                            
            # very specific fix for Michelle's amine appended MOF
            if(set(["N","H"]) <= elements):
                tempsf = 0.67
            if(set(["Mg","N"]) <= elements):
                tempsf = 0.80
            if(set(["C","H"]) <= elements):
                tempsf = 0.80
            if dist*tempsf < rad and not (alkali & elements):

                flag = self.compute_bond_image_flag(n1, n2, cell)
                self.sorted_edge_dict.update({(n1,n2): (n1, n2), (n2, n1):(n1, n2)})
                self.add_edge(n1, n2, key=self.number_of_edges() + 1,
                              order=1.0,
                              weight=1,
                              length=dist,
                              symflag = flag,
                              potential = None
                              )
    #TODO(pboyd) update this
[docs]    def compute_bond_image_flag(self, n1, n2, cell):
        """Update bonds to contain bond type, distances, and min img
        shift."""
        supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3)))
        unit_repr = np.array([5,5,5], dtype=int)
        atom1 = self.node[n1]
        atom2 = self.node[n2]
        #coord1 = self.coordinates[atom1['index']-1]
        #coord2 = self.coordinates[atom2['index']-1]
        coord1 = self.node[n1]['cartesian_coordinates']
        coord2 = self.node[n2]['cartesian_coordinates']
        fcoords = np.dot(cell._inverse, coord2) + supercells

        coords = np.array([np.dot(j, cell.cell) for j in fcoords])

        dists = distance.cdist([coord1], coords)
        dists = dists[0].tolist()
        image = dists.index(min(dists))
        dist = min(dists)
        sym = '.' if all([i==0 for i in supercells[image]]) else \
                "1_%i%i%i"%(tuple(np.array(supercells[image],dtype=int) +
                                  unit_repr))
        if(dist > 7):
            #print("BEFORE: ", self[n1][n2]['symflag'])
            print("WARNING: bonded atoms %i and %i are %.3f Angstroms apart."%(n1,n2,dist) +
                    " This probably has something to do with the redefinition of the unitcell "+
                    "to a supercell. Please contact the developers!")
        return sym

[docs]    def compute_angle_between(self, l, m, r):
        coordl = self.node[l]['cartesian_coordinates']
        coordm = self.node[m]['cartesian_coordinates']
        coordr = self.node[r]['cartesian_coordinates']

        try:
            v1 = self.min_img(coordl - coordm)
            v2 = self.min_img(coordr - coordm)
        except AttributeError:
            v1 = coordl - coordm
            v2 = coordr - coordm

        v1 /= np.linalg.norm(v1)
        v2 /= np.linalg.norm(v2)

        a = np.arccos(np.dot(v1, v2))
        if np.isnan(a):
            if np.allclose((v1 + v2),np.zeros(3)):
                a = 180
            else:
                a = 0

        angle = a / DEG2RAD
        return angle

[docs]    def coplanar(self, node):
        """ Determine if this node, and it's neighbors are
        all co-planar.

        """
        coord = self.node[node]['cartesian_coordinates']
        vects = []
        for j in self.neighbors(node):
            vects.append(self.node[j]['cartesian_coordinates'] - coord)

        # use the first two vectors to define a plane
        v1 = vects[0]
        v2 = vects[1]
        n = np.cross(v1, v2)
        n /= np.linalg.norm(n)
        for v in vects[2:]:
            v /= np.linalg.norm(v)
            # what is a good tolerance for co-planarity in MOFs?
            # this is used solely to determine if a 4-coordinated metal atom
            # is square planar or tetrahedral..
            if not np.allclose(np.dot(v,n), 0., atol=0.02):
                return False
        return True

[docs]    def compute_dihedral_between(self, a, b, c, d):
        coorda = self.node[a]['cartesian_coordinates']
        coordb = self.node[b]['cartesian_coordinates']
        coordc = self.node[c]['cartesian_coordinates']
        coordd = self.node[d]['cartesian_coordinates']

        v1 = self.min_img(coorda - coordb)
        v2 = self.min_img(coordc - coordb)
        v3 = self.min_img(coordb - coordc)
        v4 = self.min_img(coordd - coordc)

        n1 = np.cross(v1, v2)
        n2 = np.cross(v3, v4)

        n1 /= np.linalg.norm(n1)
        n2 /= np.linalg.norm(n2)

        a = np.arccos(np.dot(n1, n2))
        if np.isnan(a):
            a = 0
        angle = a / DEG2RAD
        return angle

[docs]    def add_bond_edge(self, **kwargs):
        """Add bond edges (weight factor = 1)"""
        #TODO(pboyd) should figure out if there are other cif keywords to identify
        # atom types
        #TODO(pboyd) this is .cif specific and should be contained within the cif
        # file reading portion of the code. This is so that other file formats
        # can eventually be adopted if need be.

        n1 = kwargs.pop('_geom_bond_atom_site_label_1')
        n2 = kwargs.pop('_geom_bond_atom_site_label_2')
        try:
            length = float(del_parenth(kwargs.pop('_geom_bond_distance')))
        except KeyError:
            length = 0.0

        try:
            order = CCDC_BOND_ORDERS[kwargs['_ccdc_geom_bond_type']]
        except KeyError:
            order = 1.0

        try:
            flag = kwargs.pop('_geom_bond_site_symmetry_2')
        except KeyError:
            # assume bond does not straddle a periodic boundary
            flag = '.'
        kwargs.update({'length':length})
        kwargs.update({'weight': 1})
        kwargs.update({'order': order})
        kwargs.update({'symflag': flag})
        kwargs.update({'potential': None})
        # get the node index to avoid headaches
        for k,data in self.nodes_iter2(data=True):
            if data['ciflabel'] == n1:
                n1 = k
            elif data['ciflabel'] == n2:
                n2 =k

        self.sorted_edge_dict.update({(n1,n2): (n1, n2), (n2, n1):(n1, n2)})
        self.add_edge(n1, n2, key=self.number_of_edges()+1, **kwargs)

[docs]    def compute_cartesian_coordinates(self, cell):
        """Compute the cartesian coordinates for each atom node"""
        coord_keys = ['_atom_site_x', '_atom_site_y', '_atom_site_z']
        fcoord_keys = ['_atom_site_fract_x', '_atom_site_fract_y', '_atom_site_fract_z']
        self.coordinates = np.empty((self.number_of_nodes(), 3))
        for node, data in self.nodes_iter2(data=True):
            #TODO(pboyd) probably need more error checking..
            try:
                coordinates = np.array([float(del_parenth(data[i])) for i in coord_keys])
            except KeyError:
                coordinates = np.array([float(del_parenth(data[i])) for i in fcoord_keys])
                coordinates = np.dot(coordinates, cell.cell)
            data.update({'cartesian_coordinates':coordinates})

            self.coordinates[data['index']-1] = coordinates

[docs]    def compute_min_img_distances(self, cell):
        self.distance_matrix = np.empty((self.number_of_nodes(), self.number_of_nodes()))
        for n1, n2 in itertools.combinations(self.nodes(), 2):
            id1, id2 = self.node[n1]['index']-1,\
                                self.node[n2]['index']-1
            #coords1, coords2 = self.coordinates[id1], self.coordinates[id2]
            coords1, coords2 = self.node[n1]['cartesian_coordinates'], self.node[n2]['cartesian_coordinates']
            try:
                dist = self.min_img_distance(coords1, coords2, cell)
            except TypeError:
                sys.exit()
            self.distance_matrix[id1][id2] = dist
            self.distance_matrix[id2][id1] = dist

[docs]    def min_img(self, coord):
        f = np.dot(self.cell.inverse, coord)
        f -= np.around(f)
        return np.dot(f, self.cell.cell)

[docs]    def in_cell(self, coord):
        f = np.dot(self.cell.inverse, coord) % 1
        return np.dot(f, self.cell.cell)

[docs]    def fractional(self, coord):
        f = np.dot(self.cell.inverse, coord)
        return f

[docs]    def min_img_distance(self, coords1, coords2, cell):
        one = np.dot(cell.inverse, coords1) % 1
        two = np.dot(cell.inverse, coords2) % 1
        three = np.around(one - two)
        four = np.dot(one - two - three, cell.cell)
        return np.linalg.norm(four)

[docs]    def compute_init_typing(self):
        """Find possible rings in the structure and
        initialize the hybridization for each atom.
        More refined determinations of atom and bond types
        is computed below in compute_bond_typing

        """
        #TODO(pboyd) return if atoms already 'typed' in the .cif file
        # compute and store cycles
        cycles = []
        for node, data in self.nodes_iter2(data=True):
            for n in self.neighbors(node):
                # fastest way I could think of..
                edge = self[node][n].copy()
                self.remove_edge(node, n)
                cycle = []
                try:
                    # cycle = list(nx.all_shortest_paths(self, node, n))
                    cycle = list(nx.all_simple_paths(self, source=node, target=n, cutoff=10))
                    if not cycle==[]: # storing all path that have the length of the shortest path of the graph
                        cycle_shortest_path = min(map(len, cycle))
                        cycle = [cyc for cyc in cycle if len(cyc) == cycle_shortest_path]
                except nx.exception.NetworkXNoPath:
                    pass
                self.add_edge(node, n, **edge)
                #FIXME MW edit to only store cycles < len(10)
                # should be a harmless edit but maybe need to test
                if(len(cycle) <= 10):
                    cycles += cycle
        for label, data in self.nodes_iter2(data=True):
            # N O C S
            neighbours = self.neighbors(label)
            element = data['element']
            if element == "C":
                if self.degree(label) >= 4:
                    self.node[label].update({'hybridization':'sp3'})
                elif self.degree(label) == 3:
                    self.node[label].update({'hybridization':'sp2'})
                elif self.degree(label) <= 2:
                    self.node[label].update({'hybridization':'sp'})
            elif element == "N":
                if self.degree(label) >= 3:
                    self.node[label].update({'hybridization':'sp3'})
                elif self.degree(label) == 2:
                    self.node[label].update({'hybridization':'sp2'})
                elif self.degree(label) == 1:
                    self.node[label].update({'hybridization':'sp'})
                else:
                    self.node[label].update({'hybridization':'sp3'})
            elif element == "O":
                n_elems = set([self.node[k]['element'] for k in neighbours])
                if self.degree(label) >= 2:
                    # if O is bonded to a metal, assume sp2 - like ...
                    # there's probably many cases where this fails,
                    # but carboxylate groups, bridging hydroxy groups
                    # make this true.
                    if (n_elems <= metals):
                        self.node[label].update({'hybridization': 'sp2'})
                    else:
                        self.node[label].update({'hybridization':'sp3'})
                elif self.degree(label) == 1:
                    self.node[label].update({'hybridization':'sp2'})
                else:
                    # If it has no neighbours, just give it SP3
                    self.node[label].update({'hybridization':'sp3'})
            elif element == "S":
                if self.degree(label) >= 2:
                    self.node[label].update({'hybridization':'sp3'})
                elif self.degree(label) == 1:
                    self.node[label].update({'hybridization':'sp2'})
                else:
                    self.node[label].update({'hybridization':'sp3'})

            else:
                #default sp3
                self.node[label].update({'hybridization':'sp3'})
        # convert to aromatic
        # probably not a good test for aromaticity..
        arom = set(["C", "N", "O", "S"])
        for cycle in cycles:
            elements = [self.node[k]['element'] for k in cycle]
            neigh = [self.degree(k) for k in cycle]
            if np.all(np.array(neigh) <= 3) and set(elements) <= arom:
                for a in cycle:
                    self.node[a]['hybridization'] = 'aromatic'
                    self.node[a]['cycle'] = True
                    self.node[a]['rings'].append(cycle)

[docs]    def compute_bond_typing(self):
        """ Compute bond types and atom types based on the local edge
        environment.
        Messy, loads of 'ifs'
        is there a better way to catch chemical features?
        """
        #TODO(pboyd) return if bonds already 'typed' in the .cif file
        double_check = []
        for n1, n2, data in self.edges_iter2(data=True):
            elements = [self.node[a]['element'] for a in (n1,n2)]
            hybridization = [self.node[a]['hybridization'] for a in (n1, n2)]
            rings = [self.node[a]['rings'] for a in (n1, n2)]
            samering = False

            if set(hybridization) == set(['aromatic']):
                for r in rings[0]:
                    if n2 in r:
                        samering = True
                if(samering):
                    data.update({"order" : 1.5})

            if set(elements) == set(["C", "O"]):
                car = n1 if self.node[n1]['element'] == "C" else n2
                car_data = self.node[car]
                oxy = n2 if self.node[n2]['element'] == "O" else n1
                oxy_data = self.node[oxy]

                carnn = [i for i in self.neighbors(car) if i != oxy]
                try:
                    carnelem = [self.node[j]['element'] for j in carnn]
                except:
                    carnelem = []

                oxynn = [i for i in self.neighbors(oxy) if i != car]
                try:
                    oxynelem = [self.node[j]['element'] for j in oxynn]
                except:
                    oxynelem = []
                if "O" in carnelem:
                    at = carnn[carnelem.index("O")]
                    at_data = self.node[at]
                    if self.degree(at) == 1:
                        if self.degree(oxy) == 1:
                            #CO2
                            car_data['hybridization'] = 'sp'
                            oxy_data['hybridization'] = 'sp2'
                            data['order'] = 2.
                        else:
                            # ester
                            if set(oxynelem) <= organic:
                                car_data['hybridization'] = 'sp2'
                                oxy_data['hybridization'] = 'sp2'
                                data['order'] = 1 # this is the ether part of an ester...
                            #carboxylate?
                            else:
                                car_data['hybridization'] = 'aromatic'
                                oxy_data['hybridization']= 'aromatic'
                                data['order'] = 1.5

                    else:
                        atnelem = [self.node[k]['element'] for k in self.neighbors(at)]
                        if (set(atnelem) <= organic):
                            # ester
                            if len(oxynn) == 0:
                                car_data['hybridization'] = 'sp2'
                                oxy_data['hybridization'] = 'sp2'
                                data['order'] = 2. # carbonyl part of ester
                            # some kind of resonance structure?
                            else:
                                car_data['hybridization'] = 'aromatic'
                                oxy_data['hybridization'] = 'aromatic'
                                data['order'] = 1.5
                        else:
                            car_data['hybridization'] = 'aromatic'
                            oxy_data['hybridization'] = 'aromatic'
                            data['order'] = 1.5

                if "N" in carnelem:
                    at = carnn[carnelem.index("N")]
                    # C=O of amide group
                    if self.degree(oxy) == 1:
                        data['order'] = 1.5
                        car_data['hybridization'] = 'aromatic'
                        oxy_data['hybridization'] = 'aromatic'
                # only one carbon oxygen connection.. could be C=O, R-C-O-R, R-C=O-R
                if (not "O" in carnelem) and (not "N" in carnelem):
                    if len(oxynn) > 0:
                        # ether
                        oxy_data['hybridization'] = 'sp3'
                        data['order'] = 1.0
                    else:
                        if car_data['cycle'] and car_data['hybridization'] == 'aromatic':
                            oxy_data['hybridization'] = 'aromatic'
                            data['order'] = 1.5
                        # carbonyl
                        else:
                            oxy_data['hybridization'] = 'sp2'
                            data['order'] = 2.0
            elif set(elements) == set(["C", "N"]) and not samering:
                car = n1 if self.node[n1]['element'] == "C" else n2
                car_data = self.node[car]
                nit = n2 if self.node[n2]['element'] == "N" else n1
                nit_data = self.node[nit]
                carnn = [j for j in self.neighbors(car) if j != nit]
                carnelem = [self.node[k]['element'] for k in carnn]
                nitnn = [j for j in self.neighbors(nit) if j != car]
                nitnelem = [self.node[k]['element'] for k in nitnn]
                # aromatic amine connected -- assume part of delocalized system
                if car_data['hybridization'] == 'aromatic' and set(['H']) == set(nitnelem):
                    data['order'] = 1.5
                    nit_data['hybridization'] = 'aromatic'
                # amide?
                elif self.degree(car) == 3 and len(nitnn) >=2:
                    if "O" in carnelem:
                        data['order'] = 1.5 # (amide)
                        nit_data['hybridization'] = 'aromatic'
                    # nitro
                    if set(nitnelem) == set(["O"]):
                        data['order'] = 1.
                        nit_data['hybridization'] = 'aromatic'
                        for oatom in nitnn:
                            nobond = self[nit][oatom]['order'] = 1.5
                            self.node[oatom]['hybridization'] = 'aromatic'

            elif (not self.node[n1]['cycle']) and (not self.node[n2]['cycle']) and (set(elements) <= organic):
                if set(hybridization) == set(['sp2']):
                    try:
                        cr1 = COVALENT_RADII['%s_2'%elements[0]]
                    except KeyError:
                        cr1 = COVALENT_RADII[elements[0]]
                    try:
                        cr2 = COVALENT_RADII['%s_2'%(elements[1])]
                    except KeyError:
                        cr2 = COVALENT_RADII[elements[1]]
                    covrad = cr1 + cr2
                    # first pass: assign all to 2.0 bond order
                    data['order'] = 2.0
                    double_check += [n1, n2]
                    #if (data['length'] <= covrad*.95):
                    #    data['order'] = 2.0
                elif set(hybridization) == set(['sp']):
                    try:
                        cr1 = COVALENT_RADII['%s_1'%elements[0]]
                    except KeyError:
                        cr1 = COVALENT_RADII[elements[0]]
                    try:
                        cr2 = COVALENT_RADII['%s_1'%elements[1]]
                    except KeyError:
                        cr2 = COVALENT_RADII[elements[1]]
                    # first pass: assign all to 3.0 bond order
                    double_check += [n1, n2]
                    data['order'] = 3.0
                    #covrad = cr1 + cr2
                    #if (data['length'] <= covrad*.95):
                    #    data['order'] = 3.0
        # second pass, check organic unsaturated bonds to make
        # sure alkyl chains are alternating etc.
        while double_check:
            n = double_check.pop()
            # rewind this atom to a 'terminal' connected atom
            for i in self.recurse_bonds_to_end(n, pool=[], visited=[]):
                start = i
                try:
                    idn = double_check.index(i)
                    del double_check[idn]
                except ValueError:
                    pass
            # iterate over all linear chains
            # BE CAREFUL about overwriting bond orders here, the recursion
            # can have duplicate bonds for each 'k' iteration since it iterates over
            # all possible linear chains. So if the molecule is branched, there
            # will be multiple recursions over the same set of bonds.
            for k in self.recurse_linear_chains(start, visited=[], excluded=[]):
                bond_orders = []
                # first pass, store all the bond orders
                for idx in range(len(k)-1):
                    n1 = k[idx]
                    n2 = k[idx+1]
                    bond = self[n1][n2]
                    bond_orders.append(bond['order'])
                # second pass, check continuity
                for idx in range(len(k)-1):
                    n1 = k[idx]
                    n2 = k[idx+1]
                    data1 = self.node[n1]
                    data2 = self.node[n2]

                    hyb1 = data1['hybridization']
                    hyb2 = data2['hybridization']

                    elem1 = data1['element']
                    elem2 = data2['element']
                    if(idx == 0):
                        order = bond_orders[idx]
                        if (len(bond_orders) > idx+1):
                            next_order = bond_orders[idx+1]
                        if (hyb1 == 'sp2') and (hyb2 == 'sp2'):
                            bond_orders[idx] = 2.
                    elif (idx < len(k)-2):
                        prev_order = bond_orders[idx-1]
                        next_order = bond_orders[idx+1]
                        order = bond_orders[idx]
                        if (hyb1 == 'sp2') and (hyb2 == 'sp2'):
                            if (prev_order == 2.) and (next_order == 1):
                                bond_orders[idx-1] = 1.5
                                bond_orders[idx] = 1.5
                            elif (prev_order == 2.) and (next_order == 2.):
                                bond_orders[idx] = 1.
                        elif (hyb1 == 'sp') and (hyb2 == 'sp'):
                            if (prev_order == 3.) and (next_order == 3.):
                                bond_orders[idx] = 1.
                    else:
                        prev_order = bond_orders[idx-1]
                        order = bond_orders[idx]
                        if (hyb1 == 'sp2') and (hyb2 == 'sp2'):
                            if (prev_order == 2.) and (order == 2):
                                if set([elem1, elem2]) == set(["C", "O"]):
                                    onode = n2 if self.node[n2]['element'] == "O" else n1
                                    # this very specific case is a enol
                                    if self.degree(onode) == 1:
                                        bond_orders[idx] = 1.5
                                        bond_orders[idx-1] = 1.5
                        elif (hyb1 == 'sp') and (hyb2 == 'sp'):
                            if (prev_order == 3.):
                                bond_orders[idx] = 1.

                for idx in range(len(k)-1):
                    n1 = k[idx]
                    n2 = k[idx+1]
                    bond = self[n1][n2]
                    # update bond orders.
                    bond['order'] = bond_orders[idx]

                #print([self.node[r]['element'] for r in k])

[docs]    def recurse_linear_chains(self, node, visited=[], excluded=[]):
        """Messy recursion function to return all unique chains from a set of atoms between two
        metals (or terminal atoms in the case of molecules)"""
        if self.node[node]['element'] == 'H':
            yield
        neighbors = [i for i in self.neighbors(node) if i not in excluded and self.node[i]['element'] != "H"]
        if (not neighbors) and (node in excluded) and (not visited):
            return
        elif (not neighbors) and (node in excluded):
            nde = visited.pop()
        elif (not neighbors) and (not (node in excluded)):
            excluded.append(node)
            visited.append(node)
            yield visited
            nde = visited.pop()
        else:
            excluded.append(node)
            visited.append(node)
            nde = neighbors[0]
        for x in self.recurse_linear_chains(nde, visited, excluded):
            yield x

[docs]    def recurse_bonds_to_end(self, node, pool=[], visited=[]):
        if self.node[node]['element'] == 'H':
            return
        visited.append(node)
        neighbors = [i for i in self.neighbors(node) if i not in visited and self.node[i]['element'] != "H"]
        pool += neighbors
        yield node
        if (not pool) or (self.node[node]['element'] in list(metals)):
            return
        for x in self.recurse_bonds_to_end(pool[0], pool[1:], visited):
            yield x

[docs]    def atomic_node_sanity_check(self):
        """Check for specific keyword/value pairs. Exit if non-existent"""

[docs]    def compute_angles(self):
        """angles are attached to specific nodes, this way
        if a node is cut out of a graph, the angle comes with it.


               b-----a
              /
             /
            c

        Must be updated with different adjacent nodes if a
        supercell is requested, and the angle crosses a
        periodic image.

        """
        for b, data in self.nodes_iter2(data=True):
            if self.degree(b) < 2:
                continue
            angles = itertools.combinations(self.neighbors(b), 2)
            for (a, c) in angles:
                data.setdefault('angles', {}).update({(a,c):{'potential':None}})

[docs]    def compute_dihedrals(self):
        """Dihedrals are attached to specific edges in the graph.
           a
            \
             b -- c
                   \
                    d

        the edge between b and c will contain all possible dihedral
        angles between the neighbours of b and c (this includes a
        and d and other possible bonded atoms)

        """
        for b, c, data in self.edges_iter2(data=True):
            b_neighbours = [k for k in self.neighbors(b) if k != c]
            c_neighbours = [k for k in self.neighbors(c) if k != b]
            for a in b_neighbours:
                for d in c_neighbours:
                    data.setdefault('dihedrals',{}).update({(a, d):{'potential':None}})

[docs]    def compute_improper_dihedrals(self):
        """Improper Dihedrals are attached to specific nodes in the graph.
           a
            \
             b -- c
             |
             d

        the node b will contain all possible improper dihedral
        angles between the neighbours of b

        """
        for b, data in self.nodes_iter2(data=True):
            if self.degree(b) != 3:
                continue
            # three improper torsion angles about each atom
            local_impropers = list(itertools.permutations(self.neighbors(b)))
            for idx in range(0, 6, 2):
                (a, c, d) = local_impropers[idx]
                data.setdefault('impropers',{}).update({(a,c,d):{'potential':None}})

[docs]    def compute_topology_information(self, cell, tol, num_neighbours):
        self.compute_cartesian_coordinates(cell)
        self.compute_min_img_distances(cell)
        self.compute_bonding(cell)
        self.compute_init_typing()
        self.compute_bond_typing()
        if (self.find_metal_sbus):
            self.detect_clusters(num_neighbours, tol) # num neighbors determines how many nodes from the metal element to cut out for comparison
        if (self.find_organic_sbus):
            self.detect_clusters(num_neighbours, tol,  type="Organic")
        self.compute_angles()
        self.compute_dihedrals()
        self.compute_improper_dihedrals()

[docs]    def sorted_node_list(self):
        return [n[1] for n in sorted([(data['index'], node) for node, data in self.nodes_iter2(data=True)])]

[docs]    def sorted_edge_list(self):
        return [e[1] for e in sorted([(data['index'], (n1, n2)) for n1, n2, data in self.edges_iter2(data=True)])]

[docs]    def show(self):
        nx.draw(self)

[docs]    def img_offset(self, cells, cell, maxcell, flag, redefine, n1=0):
        unit_repr = np.array([5, 5, 5], dtype=int)
        if(flag == '.'):
            return cells.index(tuple([tuple([i]) for i in cell]))
        translation = np.array([int(j) for j in flag[2:]]) - unit_repr
        ocell = np.array(cell + translation, dtype=np.float64)

        # have to find the off-diagonal values, and what their
        # multiples are to determine the image cell of this
        # bond.
        imgcell = ocell % maxcell
        if redefine is None:
            return cells.index(tuple([tuple([i]) for i in imgcell]))

        olde_imgcell = imgcell
        newcell = cell + np.dot(cell, redefine)%maxcell
        newocell = (newcell + translation) #% maxcell
        rd2 = redefine - np.identity(3)*maxcell
        # check if the newcell translation spans a periodic boundary
        if (np.any(newocell >= maxcell, axis=0)) or (np.any(newocell < 0.)):

            # get indices of where the newocell is spanning a pbc
            indexes = np.where(newocell - maxcell >= 0)[0].tolist() + np.where(newocell < 0)[0].tolist()

            # determine if the translation spans a lattice vector which is a linear combination
            # of the other vectors, in which case, make a correction in the image cell
            # this is checked by finding which lattice indices are spanning the cell, then
            # checking if these vectors are a linear combination of the other ones by
            # summing the columns together.
            # this ONLY works if b and c are lin combs of a and a is the unit vector
            newrd = np.sum(rd2[indexes], axis=0)%maxcell
            #newrd = np.dot(newocell, redefine)%maxcell
            if np.any(newrd != 0):
                imgcell = (newocell - np.dot(newocell, redefine)) % maxcell

        return cells.index(tuple([tuple([i]) for i in imgcell]))

[docs]    def update_symflag(self, cell, symflag, mincell, maxcell):
        unit_repr = np.array([5, 5, 5], dtype=int)
        ocell = cell + np.array([int(j) for j in symflag[2:]]) - unit_repr
        imgcell = ocell % maxcell
        if any(ocell < mincell) or any(ocell >= maxcell):
            newflaga = np.array([5,5,5])
            newflaga[np.where(ocell >= maxcell)] = 6
            newflaga[np.where(ocell < np.zeros(3))] = 4
            newflag = "1_%i%i%i"%(tuple(newflaga))

        else:
            newflag = '.'
        return newflag

[docs]    def correspondence_graph(self, graph, tol, general_metal=False, node_subset=None):
        """Generate a correspondence graph between the nodes
        and the SBU.
        tolerance is the distance tolerance for the edge generation
        in the correspondence graph.

        """
        if node_subset is None:
            node_subset = self.nodes()
        graph_nodes = graph.nodes()
        cg = nx.Graph()
        # add nodes to cg
        for (i, j) in itertools.product(node_subset, graph_nodes):
            # match element-wise
            elementi = self.node[i]['element']
            elementj = graph.node[j]['element']
            match = False
            if general_metal:
                if ATOMIC_NUMBER.index(elementi) in METALS and ATOMIC_NUMBER.index(elementj) in METALS:
                    match = True
                elif elementi == elementj:
                    match = True
            elif elementi == elementj:
                match = True
            if match:
                cg.add_node((i,j))
        # add edges to cg
        for (a1, b1), (a2, b2) in itertools.combinations(cg.nodes(), 2):
            if (a1 != a2) and (b1 != b2):
                da = self.distance_matrix[a1-1, a2-1]
                db = graph.distance_matrix[b1-1, b2-1]
                if np.allclose(da, db, atol=tol):
                    cg.add_edge((a1,b1), (a2,b2))
        return cg

[docs]    def detect_clusters(self, num_neighbors, tol, type='Inorganic', general_metal=False):
        """Detect clusters such as the copper paddlewheel using
        maximum clique detection. This will assign specific atoms
        with a special flag for use when building their force field.

        setting general_metal to True will allow for cluster recognition of
        inorganic SBUs while ignoring the specific element type of the metal,
        so long as it is a metal, it will be paired with other metals.
        This may increase the time for SBU recognition.

        """
        print("Detecting %s clusters"%type)

        reference_nodes = []

        if type=="Inorganic":
            types = InorganicCluster.keys()
            ref_sbus = InorganicCluster
            store_sbus = self.inorganic_sbus
        elif type == "Organic":
            types = OrganicCluster.keys()
            ref_sbus = OrganicCluster
            store_sbus = self.organic_sbus

        for node, data in self.nodes_iter2(data=True):
            if (type=='Inorganic') and (general_metal) and (data['atomic_number'] in METALS)\
                    and ('special' not in data.keys()): # special means that this atom has already been found in a previous clique detection
                reference_nodes.append(node)

            elif (data['element'] in types) and ('special' not in data.keys()):
                reference_nodes.append(node)

        no_cluster = []
        #FIXME(pboyd): This routine doesn't work for finding 1-D rod SBUs.
        # At each step the atoms found that belong to an SBU are deleted
        # to improve the search efficiency. In 1-D rod SBUs there are
        # repeating elements in the 'discretized' version used to discover
        # the rod in a MOF, so in some cases only a fragment of the rod
        # is found. This results in the wrong force field types being
        # assigned to these atoms (UFF4MOF).
        for node in reference_nodes:
        #while reference_nodes:
            #node = reference_nodes.pop()
            data = self.node[node]
            possible_clusters = {}
            toln = tol
            if type=="Inorganic" and general_metal:
                for j in ref_sbus.keys():
                    possible_clusters.update(ref_sbus[j])
            else:
                possible_clusters.update(ref_sbus[data['element']])
            try:
                neighbour_nodes = []
                instanced_neighbours = self.neighbors(node)
                if (data['element'] == "C"):
                    chk_neighbors = num_neighbors # organic clusters can be much bigger.
                else:
                    chk_neighbors = num_neighbors
                # tree-like spanning of original node
                for j in range(chk_neighbors):
                    temp_neighbours = []
                    for n in instanced_neighbours:
                        if('special' not in self.node[n].keys()):
                            neighbour_nodes.append(n)
                            temp_neighbours += [j for j in self.neighbors(n) if j not in neighbour_nodes]
                    instanced_neighbours = temp_neighbours
                cluster_found = False
                # sort by descending number of nodes, this will ensure the largest SBU will be found
                # instead of a collection of smaller ones (e.g. multiple aromatic rings).
                clusters = [(cluster.number_of_nodes(), name, cluster) for name,cluster in possible_clusters.items()]
                for count, name, cluster in list(reversed(sorted(clusters))):
                    # ignore if it is impossible to find a clique with the current cluster.
                    if (len(neighbour_nodes)+1) < cluster.number_of_nodes():
                        continue
                    cg = self.correspondence_graph(cluster, toln, general_metal=general_metal, node_subset=neighbour_nodes + [node])
                    cliques = nx.find_cliques(cg)
                    #cliques = approximation.clique.max_clique(cg)
                    for clique in cliques:
                        #print(len(clique), cluster.number_of_nodes())
                        if len(clique) == cluster.number_of_nodes():
                            # found cluster
                            # update the 'hybridization' data
                            for i,j in clique:
                                self.node[i]['special_flag'] = cluster.node[j]['special_flag']
                            cluster_found = True
                            print("Found %s"%(name))
                            store_sbus.setdefault(name, []).append([i for (i,j) in clique])
                            #break

                    #if(cluster_found):
                    #    for n in neighbour_nodes:
                    #        try:
                    #            reference_nodes.pop(reference_nodes.index(n))
                    #        except:
                    #            pass
                    #    break
                    #else:
                    #    # put node back into the pool
                    #    reference_nodes.append(node)
                if not (cluster_found):
                    no_cluster.append(data['element'])
            except KeyError:
                # no recognizable clusters for element
                no_cluster.append(data['element'])
        for j in set(no_cluster):
            print ("No recognizable %s clusters for %i elements %s"%(type.lower(), no_cluster.count(j),  j))

[docs]    def redefine_lattice(self, redefinition, lattice):
        """Redefines the lattice based on the old lattice vectors. This was designed to convert
        non-orthogonal cells to orthogonal boxes, but it could in principle be used to
        convert any cell to any other cell. (As long as the redefined lattice
        are integer multiples of the old vectors)

        """
        #print(redefinition)
        #redefinition = np.array([[1., 0., 0.], [ -2., 2.,0.], [ -1., 3., 2.]])
        # determine how many replicas of the atoms is necessary to produce the supercell.
        vol_change = np.prod(np.diag(redefinition))
        if vol_change > 20:
            print("ERROR: The volume change is %i times greater than the unit cell. "%(vol_change) +
                    "I cannot process structures of this size!")
            sys.exit()

        print("The redefined cell will be %i times larger than the original."%(int(vol_change)))

        # replicate supercell
        sc = (tuple([int(i) for i in np.diag(redefinition)]))
        self.build_supercell(sc, lattice, redefine=redefinition)
        # re-define the cell
        old_cell = np.multiply(self.cell._cell.T, sc).T
        self.cell.set_cell(np.dot(redefinition, self.cell._cell))
        # the node cartesian_coordinates must be shifted by the periodic boundaries.
        for node, data in self.nodes_iter2(data=True):
            coord = data['cartesian_coordinates']
            data['cartesian_coordinates'] = self.in_cell(coord)

        # the bonds which span a periodic boundary will change
        for n1, n2, data in self.edges_iter2(data=True):
            flag = self.compute_bond_image_flag(n1, n2, self.cell)
            data['symflag'] = flag #'.'

        # not sure what this may break, but have to assume this new cell is the 'original'
        self.store_original_size()
    
[docs]    def subgraph(self,nodelist):
        """ Have to override subgraph from networkx. No idea why, but when Graph.subgraph is
        called in nx v >= 2.0, it returns a Graph object, not a MolecularGraph.

        """
        gg = deepcopy(self)
        delete_nodes = []
        for g in gg.nodes():
            if (not g in nodelist):
                delete_nodes.append(g)

        gg.remove_nodes_from(delete_nodes)
        return gg


[docs]    def build_supercell(self, sc, lattice, track_molecule=False, molecule_len=0, redefine=None):
        """Construct a graph with nodes supporting the size of the
        supercell (sc)
        Oh man.. so ugly.
        NB: this replaces and overwrites the original unit cell data
            with a supercell. There may be a better way to do this
            if one needs to keep both the super- and unit cells.
        """
        # preserve indices across molecules.
        unitatomlen = self.original_size
        totatomlen = nx.number_of_nodes(self)
        # keep a numerical index of the nodes.. this is to make sure that the molecules
        # are kept in their positions in the supercell (if replicated)
        unit_node_ids = sorted(self.nodes())
        origincell = np.array([0., 0., 0.])
        cells = list(itertools.product(*[itertools.product(range(j)) for j in sc]))
        maxcell = np.array(sc)
        rem_edges = []
        add_edges = []
        union_graphs = []
        orig_copy = deepcopy(self)
        for count, cell in enumerate(cells):
            newcell = np.array(cell).flatten()
            offset = count * unitatomlen
            mol_offset = count * molecule_len

            cartesian_offset = np.dot(newcell, lattice.cell)
            # Initial setup of new image in the supercell.
            if (count == 0):
                graph_image = self
            else:
                # rename nodes
                graph_image = nx.relabel_nodes(deepcopy(orig_copy), {unit_node_ids[i-1]: offset+unit_node_ids[i-1] for i in range(1, totatomlen+1)})
                graph_image.sorted_edge_dict = self.sorted_edge_dict.copy()
                # rename the edges with the offset associated with this supercell.
                for k,v in list(graph_image.sorted_edge_dict.items()):
                    newkey = (k[0] + offset, k[1] + offset)
                    newval = (v[0] + offset, v[1] + offset)
                    del graph_image.sorted_edge_dict[k]
                    graph_image.sorted_edge_dict.update({newkey:newval})

            # keep track of original index value from the unit cell.
            for i in range(1, totatomlen+1):
                graph_image.node[unit_node_ids[i-1]+offset]['image'] = unit_node_ids[i-1]
            if track_molecule:
                self.molecule_images.append(graph_image.nodes())
                graph_image.molecule_id = orig_copy.molecule_id + mol_offset
            # update cartesian coordinates for each node in the image
            for node in graph_image.nodes():
                data = graph_image.node[node] 
                n_orig = data['image']
                if track_molecule:
                    data['molid'] = graph_image.molecule_id
                data['cartesian_coordinates'] = data['cartesian_coordinates'] + cartesian_offset
                # update all angle and improper terms to the curent image indices. Dihedrals will be done in the edge loop
                # angle check
                try:
                    for (a, c), val in list(data['angles'].items()):
                        aid, cid = offset + a, offset + c
                        e_ba = graph_image[node][aid]
                        e_bc = graph_image[node][cid]
                        ba_symflag = e_ba['symflag']
                        order_ba = graph_image.sorted_edge_dict[(aid, node)]
                        if order_ba != (node, aid) and e_ba['symflag'] != '.':
                            ba_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in e_ba['symflag'][2:]])))
                        bc_symflag = e_bc['symflag']
                        order_bc = graph_image.sorted_edge_dict[(node, cid)]
                        if order_bc != (node, cid) and e_bc['symflag'] != '.':
                            bc_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in e_bc['symflag'][2:]])))
                        os_a = self.img_offset(cells, newcell, maxcell, ba_symflag, redefine) * unitatomlen
                        os_c = self.img_offset(cells, newcell, maxcell, bc_symflag, redefine) * unitatomlen
                        data['angles'].pop((a,c))
                        data['angles'][(a + os_a, c + os_c)] = val

                except KeyError:
                    # no angles for n1
                    pass
                # improper check
                try:
                    for (a, c, d), val in list(data['impropers'].items()):
                        aid, cid, did = offset + a, offset + c, offset + d
                        e_ba = graph_image[node][aid]
                        order_ba = graph_image.sorted_edge_dict[(node, aid)]
                        ba_symflag = e_ba['symflag']
                        e_bc = graph_image[node][cid]
                        order_bc = graph_image.sorted_edge_dict[(node, cid)]
                        bc_symflag = e_bc['symflag']
                        e_bd = graph_image[node][did]
                        order_bd = graph_image.sorted_edge_dict[(node, did)]
                        bd_symflag = e_bd['symflag']
                        if order_ba != (node, aid) and e_ba['symflag'] != '.':
                            ba_symflag = "1_%i%i%i"%(tuple([10 - int(j) for j in e_ba['symflag'][2:]]))
                        if order_bc != (node, cid) and e_bc['symflag'] != '.':
                            bc_symflag = "1_%i%i%i"%(tuple([10 - int(j) for j in e_bc['symflag'][2:]]))
                        if order_bd != (node, did) and e_bd['symflag'] != '.':
                            bd_symflag = "1_%i%i%i"%(tuple([10 - int(j) for j in e_bd['symflag'][2:]]))

                        os_a = self.img_offset(cells, newcell, maxcell, ba_symflag, redefine) * unitatomlen
                        os_c = self.img_offset(cells, newcell, maxcell, bc_symflag, redefine) * unitatomlen
                        os_d = self.img_offset(cells, newcell, maxcell, bd_symflag, redefine) * unitatomlen
                        data['impropers'].pop((a,c,d))
                        data['impropers'][(a + os_a, c + os_c, d + os_d)] = val

                except KeyError:
                    # no impropers for n1
                    pass

            # update nodes and edges to account for bonding to periodic images.
            #unique_translations = {}
            for v1, v2 in graph_image.edges():
                #data = graph_image.edges[(v1,v2)]
                data = graph_image[v1][v2]
                n1,n2 = graph_image.sorted_edge_dict[(v1,v2)]
                # flag boundary crossings, and determine updated nodes.
                # check symmetry flags if they need to be updated,
                n1_data = graph_image.node[n1]
                n2_data = graph_image.node[n2]
                try:
                    n1_orig = n1_data['image']
                    n2_orig = n2_data['image']
                except KeyError:
                    n1_orig = n1
                    n2_orig = n2
                # TODO(pboyd) the data of 'rings' for each node is not updated, do so if needed..
                # update angle, dihedral, improper indices.
                # dihedrals are difficult if the edge spans one of the terminal atoms..

                if (data['symflag'] != '.'):
                    # DEBUGGING
                    unit_repr = np.array([5, 5, 5], dtype=int)
                    translation = tuple(np.array([int(j) for j in data['symflag'][2:]]) - unit_repr)
                    #unique_translations.setdefault(translation,0)
                    #unique_translations[translation] += 1
                    # DEBUGGING
                    os_id = self.img_offset(cells, newcell, maxcell, data['symflag'], redefine, n1)
                    offset_c = os_id * unitatomlen
                    img_n2 = offset_c + n2_orig
                    #if (n1 == 1712):
                    #    print(os_id)
                    #    print(newcell)
                    #    print(redefine)
                    # pain...
                    opposite_flag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in data['symflag'][2:]])))
                    rev_n1_img = self.img_offset(cells, newcell, maxcell, opposite_flag, redefine) * unitatomlen + n1_orig
                    # dihedral check
                    try:
                        for (a, d), val in list(data['dihedrals'].items()):
                            # check to make sure edge between a, n1 is not crossing an image
                            edge_n1_a = orig_copy[n1_orig][a]
                            order_n1_a = graph_image.sorted_edge_dict[(n1, a+offset)]
                            n1a_symflag = edge_n1_a['symflag']

                            edge_n2_d = orig_copy[n2_orig][d]
                            order_n2_d = graph_image.sorted_edge_dict[(n2, d+offset)]
                            n2d_symflag = edge_n2_d['symflag']

                            offset_a = offset
                            if order_n1_a != (n1, a+offset) and edge_n1_a['symflag'] != '.':
                                n1a_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in edge_n1_a['symflag'][2:]])))
                            if (edge_n1_a['symflag'] != '.'):
                                offset_a = self.img_offset(cells, newcell, maxcell, n1a_symflag, redefine) * unitatomlen
                            # check to make sure edge between n2, c is not crossing an image
                            offset_d = offset_c

                            if order_n2_d != (n2, d+offset) and edge_n2_d['symflag'] != '.':
                                n2d_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in edge_n2_d['symflag'][2:]])))
                            if (edge_n2_d['symflag'] != '.'):
                                offset_d = self.img_offset(cells, np.array(cells[os_id]).flatten(), maxcell, n2d_symflag, redefine) * unitatomlen

                            aid, did = offset_a + a, offset_d + d
                            copyover = data['dihedrals'].pop((a,d))
                            data['dihedrals'][(aid, did)] = copyover
                    except KeyError:
                        # no dihedrals here.
                        pass

                    # Update symmetry flag of bond
                    data['symflag'] = self.update_symflag(newcell, data['symflag'], origincell, maxcell)
                    add_edges += [((n1, img_n2),data)]
                    rem_edges += [(n1, n2)]
                else:
                    # dihedral check
                    try:
                        for (a, d), val in list(data['dihedrals'].items()):
                            # check to make sure edge between a, n1 is not crossing an image
                            edge_n1_a = orig_copy[n1_orig][a]
                            order_n1_a = graph_image.sorted_edge_dict[(n1, a+offset)]
                            n1a_symflag = edge_n1_a['symflag']

                            edge_n2_d = orig_copy[n2_orig][d]
                            order_n2_d = graph_image.sorted_edge_dict[(n2, d+offset)]
                            n2d_symflag = edge_n2_d['symflag']

                            offset_a = offset
                            if order_n1_a != (n1, a+offset) and edge_n1_a['symflag'] != '.':
                                n1a_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in edge_n1_a['symflag'][2:]])))
                            if (edge_n1_a['symflag'] != '.'):
                                offset_a = self.img_offset(cells, newcell, maxcell, n1a_symflag, redefine) * unitatomlen
                            # check to make sure edge between n2, c is not crossing an image
                            offset_d = offset

                            if order_n2_d != (n2, d+offset) and edge_n2_d['symflag'] != '.':
                                n2d_symflag = "1_%i%i%i"%(tuple(np.array([10,10,10]) - np.array([int(j) for j in edge_n2_d['symflag'][2:]])))
                            if (edge_n2_d['symflag'] != '.'):
                                offset_d = self.img_offset(cells, newcell, maxcell, n2d_symflag, redefine) * unitatomlen

                            aid, did = offset_a + a, offset_d + d
                            copyover = data['dihedrals'].pop((a,d))
                            data['dihedrals'][(aid, did)] = copyover
                    except KeyError:
                        # no dihedrals here.
                        pass
            if (count > 0):
                union_graphs.append(graph_image)
        for G in union_graphs:
            for node in G.nodes():
                data = G.node[node]
                self.add_node(node, **data)
           #once nodes are added, add edges.
        for G in union_graphs:
            self.sorted_edge_dict.update(G.sorted_edge_dict)
            for v1, v2 in G.edges():
                #d=G.edges[(v1,v2)]
                d=G[v1][v2]
                n1,n2 = G.sorted_edge_dict[(v1,v2)]
                self.add_edge(n1, n2, **d)

        for (n1, n2) in rem_edges:
            self.remove_edge(n1, n2)
            self.sorted_edge_dict.pop((n1,n2))
            self.sorted_edge_dict.pop((n2,n1))
        for (n1, n2), data in add_edges:
            self.add_edge(n1, n2, **data)
            self.sorted_edge_dict.update({(n1,n2):(n1,n2)})
            self.sorted_edge_dict.update({(n2,n1):(n1,n2)})
        #print(list(unique_translations.keys()))
[docs]    def unwrap_node_coordinates(self, cell):
        """Must be done before supercell generation.
        This is a recursive method iterating over all edges.
        The design is totally unpythonic and
        written in about 5 mins.. so be nice (PB)

        """
        supercells = np.array(list(itertools.product((-1, 0, 1), repeat=3)))
        # just use the first node as the unwrapping point..
        # probably a better way to do this to keep most atoms in the unit cell,
        # but I don't think it matters too much.
        nodelist = list(self.nodes())
        n1 = nodelist[0]
        queue = []
        while (nodelist or queue):
            for n2, data in self[n1].items():
                if n2 not in queue and n2 in nodelist:
                    queue.append(n2)
                    coord1 = self.node[n1]['cartesian_coordinates']
                    coord2 = self.node[n2]['cartesian_coordinates']
                    fcoords = np.dot(cell.inverse, coord2) + supercells

                    coords = np.array([np.dot(j, cell.cell) for j in fcoords])

                    dists = distance.cdist([coord1], coords)
                    dists = dists[0].tolist()
                    image = dists.index(min(dists))
                    self.node[n2]['cartesian_coordinates'] += np.dot(supercells[image], cell.cell)
                    data['symflag'] = '.'
            del nodelist[nodelist.index(n1)]
            try:
                n1 = queue[0]
                queue = queue[1:]

            except IndexError:
                pass

[docs]    def store_original_size(self):
        self.original_size = self.number_of_nodes()

[docs]    def __iadd__(self, newgraph):
        self.sorted_edge_dict.update(newgraph.sorted_edge_dict)
        for n, data in newgraph.nodes_iter2(data=True):
            self.add_node(n, **data)
        for n1,n2, data in newgraph.edges_iter2(data=True):
            self.add_edge(n1,n2, **data)
        return self

[docs]    def __or__(self, graph):
        if (len(graph.nodes()) == 1) and len(self.nodes()) == 1:
            return list([0])
        cg = self.correspondence_graph(graph, 0.4)
        cliques = list(nx.find_cliques(cg))
        cliques.sort(key=len)
        return cliques[-1]

[docs]def del_parenth(string):
    return re.sub(r'\([^)]*\)', '' , string)

[docs]def from_CIF(cifname):
    """Reads the structure data from the CIF
    - currently does not read the symmetry of the cell
    - does not unpack the assymetric unit (assumes P1)
    - assumes that the appropriate keys are in the cifobj (no error checking)
    """

    cifobj = CIF()
    cifobj.read(cifname)

    data = cifobj._data
    # obtain atoms and cell
    cell = Cell()
    # add data to molecular graph (to be parsed later..)
    mg = MolecularGraph(name=clean(cifname))
    cellparams = [float(del_parenth(i)) for i in [data['_cell_length_a'],
                                     data['_cell_length_b'],
                                     data['_cell_length_c'],
                                     data['_cell_angle_alpha'],
                                     data['_cell_angle_beta'],
                                     data['_cell_angle_gamma']]]
    cell.set_params(cellparams)

    #add atom nodes
    id = cifobj.block_order.index('atoms')
    atheads = cifobj._headings[id]
    for atom_data in zip(*[data[i] for i in atheads]):
        kwargs = {a:j.strip() for a, j in zip(atheads, atom_data)}
        mg.add_atomic_node(**kwargs)

    # add bond edges, if they exist
    try:
        id = cifobj.block_order.index('bonds')
        bondheads = cifobj._headings[id]
        for bond_data in zip(*[data[i] for i in bondheads]):
            kwargs = {a:j.strip() for a, j in zip(bondheads, bond_data)}
            mg.add_bond_edge(**kwargs)
    except:
        # catch no bonds
        print("No bonds reported in cif file - computing bonding..")
    mg.store_original_size()
    mg.cell = cell
    return cell, mg

[docs]def write_CIF(graph, cell):
    """Currently used for debugging purposes"""
    c = CIF(name="%s.debug"%graph.name)
    # data block
    c.add_data("data", data_=graph.name)
    c.add_data("data", _audit_creation_date=
                        CIF.label(c.get_time()))
    c.add_data("data", _audit_creation_method=
                        CIF.label("Lammps Interface v.%s"%(str(0))))

    # sym block
    c.add_data("sym", _symmetry_space_group_name_H_M=
                        CIF.label("P1"))
    c.add_data("sym", _symmetry_Int_Tables_number=
                        CIF.label("1"))
    c.add_data("sym", _symmetry_cell_setting=
                        CIF.label("triclinic"))

    # sym loop block
    c.add_data("sym_loop", _symmetry_equiv_pos_as_xyz=
                        CIF.label("'x, y, z'"))

    # cell block
    c.add_data("cell", _cell_length_a=CIF.cell_length_a(cell.a))
    c.add_data("cell", _cell_length_b=CIF.cell_length_b(cell.b))
    c.add_data("cell", _cell_length_c=CIF.cell_length_c(cell.c))
    c.add_data("cell", _cell_angle_alpha=CIF.cell_angle_alpha(cell.alpha))
    c.add_data("cell", _cell_angle_beta=CIF.cell_angle_beta(cell.beta))
    c.add_data("cell", _cell_angle_gamma=CIF.cell_angle_gamma(cell.gamma))
    # atom block
    element_counter = {}
    carts = []
    for node, data in graph.nodes_iter2(data=True):
        label = "%s%i"%(data['element'], node)
        c.add_data("atoms", _atom_site_label=
                                CIF.atom_site_label(label))
        c.add_data("atoms", _atom_site_type_symbol=
                                CIF.atom_site_type_symbol(data['element']))
        try:
            c.add_data("atoms", _atom_site_description=
                                CIF.atom_site_description(data['force_field_type']))
        except KeyError:
            c.add_data("atoms", _atom_site_description=
                                CIF.atom_site_description(data['element']))

        coords = data['cartesian_coordinates']
        carts.append(coords)
        fc = np.dot(cell.inverse, coords)
        c.add_data("atoms", _atom_site_fract_x=
                                CIF.atom_site_fract_x(fc[0]))
        c.add_data("atoms", _atom_site_fract_y=
                                CIF.atom_site_fract_y(fc[1]))
        c.add_data("atoms", _atom_site_fract_z=
                                CIF.atom_site_fract_z(fc[2]))
        try:
            c.add_data("atoms", _atom_type_partial_charge=
                                CIF.atom_type_partial_charge(data['charge']))
        except KeyError:
            c.add_data("atoms", _atom_type_partial_charge="0.0")
    # bond block
    # must re-sort them based on bond type (Mat Sudio)
    tosort = [(data['order'], (n1, n2, data)) for n1, n2, data in graph.edges_iter2(data=True)]
    for ord, (n1, n2, data) in sorted(tosort, key=lambda tup: tup[0]):
        try:
            type = CCDC_BOND_ORDERS[data['order']]
        except KeyError:
            # just assume single bond if not in CCDC_BOND_ORDERS dic
            type = "S" 
        dist = data['length']
        sym = data['symflag']


        label1 = "%s%i"%(graph.node[n1]['element'], n1)
        label2 = "%s%i"%(graph.node[n2]['element'], n2)
        c.add_data("bonds", _geom_bond_atom_site_label_1=
                                    CIF.geom_bond_atom_site_label_1(label1))
        c.add_data("bonds", _geom_bond_atom_site_label_2=
                                    CIF.geom_bond_atom_site_label_2(label2))
        c.add_data("bonds", _geom_bond_distance=
                                    CIF.geom_bond_distance(dist))
        c.add_data("bonds", _geom_bond_site_symmetry_2=
                                    CIF.geom_bond_site_symmetry_2(sym))
        c.add_data("bonds", _ccdc_geom_bond_type=
                                    CIF.ccdc_geom_bond_type(type))

    print('Output cif file written to %s.cif'%c.name)
    file = open("%s.cif"%c.name, "w")
    file.writelines(str(c))
    file.close()

[docs]def write_PDB(graph, cell):
    """
    Program to write pdb file from structure graph. Doing this because MS v. 6 doesn't do the CONECT lines
    correctly.

    """
    pdbfile=open("%s.debug.pdb"%(graph.name), "w")

    # remark section
    pdbfile.writelines("%6s %3i PDB file created by Lammps intervace v.%i\n"%("REMARK", 1, 0))
    pdbfile.writelines("%6s %3i Created on %s\n"%("REMARK",2,get_time()))
    # write crystallographic data 
    pdbfile.writelines("%6s%9.3f%9.3f%9.3f%7.2f%7.2f%7.2f %11s%4i\n"%(
        "CRYST1",cell.a, cell.b, cell.c, cell.alpha, cell.beta, cell.gamma,"P1",1))
    conect_string = ""
    for node, d in graph.nodes(data=True):  # nx2.1
        # 55 - 60 - occupancy
        # 61 - 66 temperature factor
        # 77 - 78 element symbol (left justified)
        # 79 - 80 charge on atom.
        try:
            label = "%s"%(d['ciflabel'])
        except:
            label = "%i%s"%(node,d['element'])
        cart = d['cartesian_coordinates']
        pdbfile.writelines("%-6s%5i %4s%1s%3s% 1s%3i%1s   %8.3f%8.3f%8.3f%6.2f%6.2f          %2s%2s\n"
                               %("ATOM",        # 1 - 6: ATOM
                                  node,         # 7 - 11: atom number
                                  label,        # 13 - 16: atom name
                                  " ",          # 17: alternate location id. ????
                                  "MOL",        # 18 - 20: residue name
                                  " ",          # 22: chainId?
                                  2,            # 23 - 26: residue sequence number
                                  " ",          # 27: code for insertion of residues
                                  cart[0],      # 31 - 38: real 8.3 x
                                  cart[1],      # 39 - 46: real 8.3 y
                                  cart[2],      # 47 - 54: real 8.3 z
                                  1.0,          # 55 - 60: occupancy
                                  0.0,          # 61 - 66: temperature factor
                                  d['element'], # 77 - 78: element symbol (left justified)
                                  "  ",         # 79 - 80: charge on atom.
                                  ))
        # list bonded atoms, 4 per line.

        neighbs = sorted(graph.neighbors(node), reverse=True)
        remove = []
        # delete connected atoms that cross periodic boundary, cause vis software for 
        # pdb files suck.
        for idx,n in enumerate(neighbs):
            bonddata = graph.get_edge_data(node,n)
            if bonddata['symflag'] != '.':
                remove.append(idx)
        for i in sorted(remove, reverse=True):
            del neighbs[i]

        num_lines = math.ceil(len(neighbs)/4.)

        for i in range(num_lines):
            try:
                neighbour1 = "%5i"%(neighbs.pop())
            except IndexError:
                neighbour1 = "%5s"%(" ")
            try:
                neighbour2 = "%5i"%(neighbs.pop())
            except IndexError:
                neighbour2 = "%5s"%(" ")
            try:
                neighbour3 = "%5i"%(neighbs.pop())
            except IndexError:
                neighbour3 = "%5s"%(" ")
            try:
                neighbour4 = "%5i"%(neighbs.pop())
            except IndexError:
                neighbour4 = "%5s"%(" ")

            
            conect_string += "%6s%5s%5s%5s%5s%5s\n"%(
              "CONECT",                         # 1 - 6: CONECT
              node,                             # 7 - 11: atom serial number
              neighbour1,                       # 12 - 16: serial number of bonded atom
              neighbour2,                       # 17 - 21: serial number of bonded atom
              neighbour3,                       # 22 - 26: serial number of bonded atom
              neighbour4,                       # 27 - 31: serial number of bonded atom
              )
    pdbfile.writelines("%6s%5i\n"%("TER", graph.number_of_nodes() + 1))
    pdbfile.writelines(conect_string)
    pdbfile.writelines("END")
    pdbfile.close()
    print('Output file written to %s.debug.pdb'%graph.name)

[docs]def write_RASPA_CIF(graph, cell,classifier=0):
    """
    Same as debugging cif write routine
        except _atom_site_label is now equal to _atom_site_description
        b/c RASPA uses _atom_site_label as the type for assigning FF params
    """
    c = CIF(name="%s_raspa"%graph.name)
    # data block
    c.add_data("data", data_=graph.name)
    c.add_data("data", _audit_creation_date=
                        CIF.label(c.get_time()))
    c.add_data("data", _audit_creation_method=
                        CIF.label("Lammps Interface v.%s"%(str(0))))

    # sym block
    c.add_data("sym", _symmetry_space_group_name_H_M=
                        CIF.label("P1"))
    c.add_data("sym", _symmetry_Int_Tables_number=
                        CIF.label("1"))
    c.add_data("sym", _symmetry_cell_setting=
                        CIF.label("triclinic"))

    # sym loop block
    c.add_data("sym_loop", _symmetry_equiv_pos_as_xyz=
                        CIF.label("'x, y, z'"))

    # cell block
    c.add_data("cell", _cell_length_a=CIF.cell_length_a(cell.a))
    c.add_data("cell", _cell_length_b=CIF.cell_length_b(cell.b))
    c.add_data("cell", _cell_length_c=CIF.cell_length_c(cell.c))
    c.add_data("cell", _cell_angle_alpha=CIF.cell_angle_alpha(cell.alpha))
    c.add_data("cell", _cell_angle_beta=CIF.cell_angle_beta(cell.beta))
    c.add_data("cell", _cell_angle_gamma=CIF.cell_angle_gamma(cell.gamma))
    # atom block
    element_counter = {}
    carts = []

    # slight modification to make sure atoms printed out in same order as in data and original cif
    for node, data in sorted(graph.nodes_iter2(data=True)):
        label = "%s%i"%(data['element'], node)

        # sometimes we need to keep the original CIF label in the case
        # of slab geometries where charge symmetry is very reduced
        if(classifier==0):
            c.add_data("atoms", _atom_site_label=
                              CIF.atom_site_label(data['force_field_type']))
        elif(classifier==1):
            c.add_data("atoms", _atom_site_label=
                              CIF.atom_site_label(data['ciflabel']))

        c.add_data("atoms", _atom_site_type_symbol=
                                CIF.atom_site_type_symbol(data['element']))
        #c.add_data("atoms", _atom_site_description=
        #                        CIF.atom_site_description(data['force_field_type']))
        coords = data['cartesian_coordinates']
        carts.append(coords)
        fc = np.dot(cell.inverse, coords)
        c.add_data("atoms", _atom_site_fract_x=
                                CIF.atom_site_fract_x(fc[0]))
        c.add_data("atoms", _atom_site_fract_y=
                                CIF.atom_site_fract_y(fc[1]))
        c.add_data("atoms", _atom_site_fract_z=
                                CIF.atom_site_fract_z(fc[2]))
        c.add_data("atoms", _atom_site_charge=
                                CIF.atom_type_partial_charge(data['charge']))
    # bond block
    # must re-sort them based on bond type (Mat Sudio)
    #tosort = [(data['order'], (n1, n2, data)) for n1, n2, data in graph.edges_iter2(data=True)]
    #for ord, (n1, n2, data) in sorted(tosort, key=lambda tup: tup[0]):
    #    type = CCDC_BOND_ORDERS[data['order']]
    #    dist = data['length']
    #    sym = data['symflag']


    #    label1 = "%s%i"%(graph.node[n1]['element'], n1)
    #    label2 = "%s%i"%(graph.node[n2]['element'], n2)
    #    c.add_data("bonds", _geom_bond_atom_site_label_1=
    #                                CIF.geom_bond_atom_site_label_1(label1))
    #    c.add_data("bonds", _geom_bond_atom_site_label_2=
    #                                CIF.geom_bond_atom_site_label_2(label2))
    #    c.add_data("bonds", _geom_bond_distance=
    #                                CIF.geom_bond_distance(dist))
    #    c.add_data("bonds", _geom_bond_site_symmetry_2=
    #                                CIF.geom_bond_site_symmetry_2(sym))
    #    c.add_data("bonds", _ccdc_geom_bond_type=
    #                                CIF.ccdc_geom_bond_type(type))

    print('Output cif file written to %s.cif'%c.name)
    file = open("%s.cif"%c.name, "w")
    file.writelines(str(c))
    file.close()



[docs]def write_RASPA_sim_files(lammps_sim,classifier=0):
    """
    Write the RASPA pseudo_atoms.def file for this MOF
    All generic adsorbates info automatically included
    Additional framework atoms taken from lammps_interface anaylsis


    classifier = 0 -> UFF atom label is used for pseudo atoms
    classifier = 1 -> cif label is used for pseudo atoms (useful for
                      interface/defect systems where symmetry of charge density
                      is very reduced)

    """

    MOF_PSEUDO_ATOMS = []
    MOF_FF_MIXING = []

    data_list = []
    max_image = 0
    if(classifier == 0):
        for node, data in sorted(lammps_sim.unique_atom_types.items()):
            data['node']=node
            print(key)
            print(data)
            data_list.append(data)

            if(int(data['image']) > max_image):
                max_image = int(data['image'])
            #keys.append(lammps_sim.graph.node[n]['force_field_type'])

    elif(classifier == 1):
        for node, data in sorted(lammps_sim.graph.nodes_iter2(data=True)):
            data['node']=node
            print(node)
            print(data)
            data_list.append(data)

            if(int(data['image']) > max_image):
                max_image = int(data['image'])
            #keys.append(lammps_sim.graph.node[n]['force_field_type'])

    print(max_image)
    for i in range(len(data_list)):

        #ind = 0
        ##for char in key:
        #    # identify first non alphabetic character in string
        #    if((ord(char) >= 65 and ord(char) <= 90) or (ord(char)>=97 and ord(char) <= 122)):
        #        ind += 1
        #    else:
        #        break
        #atmtype_ = key[:ind]
        #print(key,atmtype_)
        #print(atmtype_ in MASS)
        #print(atmtype_ in COVALENT_RADII)
        #print(key in UFF_DATA)

        if(data_list[i]['node']<max_image):
            element_ = data_list[i]['element']
            fftype_ = data_list[i]['force_field_type']

            try:
                if(classifier==0):
                    type_spec_ = data_list[i]['force_field_type']
                elif(classifier==1):
                    type_spec_ = data_list[i]['ciflabel']

                print_ = 'yes'
                as_ = element_
                chem_ = element_
                oxidation_ = '0'
                mass_ = str(MASS[element_])
                charge_ = str(data_list[i]['charge'])
                polarization_ = '0.0'
                B_factor_ = '1.0'
                radii_ = str(COVALENT_RADII[element_])
                connectivity_ = '0'
                anisotropic_ = '0'
                anisotropic_type_ = 'relative'
                tinker_type_ = '0'

                potential_ = 'lennard-jones'
                eps_ = str(UFF_DATA[fftype_][3]*500)
                sig_ = str(UFF_DATA[fftype_][2]*(2**(-1./6.)))
            except:
                print("%s %s not found!\n"%(element_, fftype_))
                continue

            # finally, add strings to list for each RASPA file
            MOF_PSEUDO_ATOMS.append([type_spec_, print_, as_,chem_, oxidation_,mass_, charge_, \
                                     polarization_, B_factor_, radii_, connectivity_, \
                                     anisotropic_, anisotropic_type_, tinker_type_])

            MOF_FF_MIXING.append([type_spec_, potential_, eps_, sig_])

    if(len(MOF_PSEUDO_ATOMS) == 0):
        print("Error! No MOF atoms found. Exiting...")
        sys.exit()

    # Determine final column widths
    col_widths = [0 for i in range(len(MOF_PSEUDO_ATOMS[0]))]
    for i in range(len(MOF_PSEUDO_ATOMS[0])):
        # num columns
        max_width = 0
        for j in range(len(MOF_PSEUDO_ATOMS)):
            # num rows
            if(len(MOF_PSEUDO_ATOMS[j][i]) > max_width):
                max_width = len(MOF_PSEUDO_ATOMS[j][i])
        col_widths[i] = max_width + 2

    col_widths1 = [0 for i in range(len(GENERIC_PSEUDO_ATOMS[0]))]
    for i in range(len(GENERIC_PSEUDO_ATOMS[0])):
        # num columns
        max_width = 0
        for j in range(len(GENERIC_PSEUDO_ATOMS)):
            # num rows
            if(len(GENERIC_PSEUDO_ATOMS[j][i]) > max_width):
                max_width = len(GENERIC_PSEUDO_ATOMS[j][i])
        col_widths1[i] = max_width + 2

    col_widths_final = [max(col_widths[i], col_widths1[i]) for i in range(len(col_widths))]


    # Begin file writing
    f = open('pseudo_atoms.def','w')

    # write header
    num_psuedo_atoms =len(GENERIC_PSEUDO_ATOMS) + len(MOF_PSEUDO_ATOMS)
    GENERIC_PSEUDO_ATOMS_HEADER[1] = str(num_psuedo_atoms)
    for line in GENERIC_PSEUDO_ATOMS_HEADER:
        f.write("".join(word for word in line) +'\n')

    # write this MOFs pseudo atoms
    for i in range(len(MOF_PSEUDO_ATOMS)):
        base_string = ""
        for j in range(len(MOF_PSEUDO_ATOMS[0])):
            buff = "".join(" " for i in range(col_widths_final[j] - len(MOF_PSEUDO_ATOMS[i][j])))

            base_string += MOF_PSEUDO_ATOMS[i][j]
            base_string += buff
        f.write(base_string + '\n')

    # write the generic adsorbates
    for i in range(len(GENERIC_PSEUDO_ATOMS)):
        base_string = ""
        for j in range(len(GENERIC_PSEUDO_ATOMS[0])):
            buff = "".join(" " for i in range(col_widths_final[j] - len(GENERIC_PSEUDO_ATOMS[i][j])))

            base_string += GENERIC_PSEUDO_ATOMS[i][j]
            base_string += buff
        f.write(base_string + '\n')

    f.close()


    # Determine column widths for FF MIXING
    col_widths = [0 for i in range(len(MOF_FF_MIXING[0]))]
    for i in range(len(MOF_FF_MIXING[0])):
        # num columns
        max_width = 0
        for j in range(len(MOF_FF_MIXING)):
            # num rows
            if(len(MOF_FF_MIXING[j][i]) > max_width):
                max_width = len(MOF_FF_MIXING[j][i])
        col_widths[i] = max_width + 2

    col_widths1 = [0 for i in range(len(GENERIC_FF_MIXING[0]))]
    for i in range(len(GENERIC_FF_MIXING[0])):
        # num columns
        max_width = 0
        for j in range(len(GENERIC_FF_MIXING)):
            # num rows
            if(len(GENERIC_FF_MIXING[j][i]) > max_width):
                max_width = len(GENERIC_FF_MIXING[j][i])
        col_widths1[i] = max_width + 2

    col_widths_final = [max(col_widths[i], col_widths1[i]) for i in range(len(col_widths))]

    # write ff mixing file
    f = open('force_field_mixing_rules.def','w')

    # write header
    num_interactions = len(MOF_FF_MIXING) + len(GENERIC_FF_MIXING)
    GENERIC_FF_MIXING_HEADER[5] = str(num_interactions)
    for line in GENERIC_FF_MIXING_HEADER:
        f.write("".join(word for word in line) +'\n')


    # write this MOFs pseudo atoms
    for i in range(len(MOF_FF_MIXING)):
        base_string = ""
        for j in range(len(MOF_FF_MIXING[0])):
            buff = "".join(" " for i in range(col_widths_final[j] - len(MOF_FF_MIXING[i][j])))

            base_string += MOF_FF_MIXING[i][j]
            base_string += buff
        f.write(base_string + '\n')

    # write the generic adsorbates
    for i in range(len(GENERIC_FF_MIXING)):
        base_string = ""
        for j in range(len(GENERIC_FF_MIXING[0])):
            buff = "".join(" " for i in range(col_widths_final[j] - len(GENERIC_FF_MIXING[i][j])))

            base_string += GENERIC_FF_MIXING[i][j]
            base_string += buff
        f.write(base_string + '\n')

    for line in GENERIC_FF_MIXING_FOOTER:
        f.write("".join(word for word in line) +'\n')

    f.close()


[docs]class MDMC_config(object):
    """
    Very sloppy for now but just doing the bare minimum to get this up and running
    for methane in flexible materials
    """

[docs]    def __init__(self, lammps_sim):

        try:
            f = open("MDMC.config","r")
        except:
            self.initialized = False
            print("Warning! No MDMC.config file found.  LAMMPS sim files will not have guest molecule info")
            return

        lines = f.readlines()

        outlines = ""
        for line in lines:
            parsed = line.strip().split()

            if(parsed[0] == "num_framework"):
                outlines += "num_framework\t%d\n"%(nx.number_of_nodes(lammps_sim.graph))
            if(parsed[0] == "type_framework"):
                outlines += "type_framework\t%d\n"%(len(lammps_sim.unique_atom_types.keys()))
            if(parsed[0] == "type_guest"):
                self.type_guest = int(parsed[1])
                outlines += "type_guest\t%d\n"%(self.type_guest)
            if(parsed[0] == "pair_coeff"):
                parsed[1] = str(len(lammps_sim.unique_atom_types.keys()) + self.type_guest)
                for word in parsed:
                    outlines += word + " "
                outlines += "\n"
            if(parsed[0] == "mass_guest"):
                parsed[1] = str(len(lammps_sim.unique_atom_types.keys()) + self.type_guest)
                for word in parsed:
                    outlines += word + " "
                outlines += "\n"

        f.close()

        f = open("MDMC.config", "w")
        f.write(outlines)
        f.close()

        return


[docs]class Cell(object):
[docs]    def __init__(self):
        self._cell = np.identity(3, dtype=np.float64)
        # cell parameters (a, b, c, alpha, beta, gamma)
        self._params = (1., 1., 1., 90., 90., 90.)
        self._inverse = None

    @property
    def volume(self):
        """Calculate cell volume a.bxc."""
        b_cross_c = np.cross(self.cell[1], self.cell[2])
        return np.dot(self.cell[0], b_cross_c)

[docs]    def get_cell(self):
        """Get the 3x3 vector cell representation."""
        return self._cell

[docs]    def get_cell_inverse(self):
        """Get the 3x3 vector cell representation."""
        return self._inverse

[docs]    def mod_to_UC(self, num):
        """
        Retrun any fractional coordinate back into the unit cell
        """
        if(hasattr(num,'__iter__')):
            for i in range(len(num)):
                if(num[i] < 0.0):
                    num[i] = 1+math.fmod(num[i], 1.0)
                else:
                    num[i] = math.fmod(num[i], 1.0)

            return num
        else:
            if(num < 0.0):
                num = math.fmod((num*(-1)), 1.0)
            else:
                num = math.fmod(num, 1.0)

[docs]    def set_cell(self, value):
        """Set cell and params from the cell representation."""
        # Class internally expects an array
        self._cell = np.array(value).reshape((3,3))
        self.__mkparam()
        self.__mklammps()
        # remake cell so a in x, b in xy and c in xyz
        self.__mkcell()
        self._inverse = np.linalg.inv(self.cell.T)

    # Property so that params are updated when cell is set
    cell = property(get_cell, set_cell)

[docs]    def get_params(self):
        """Get the six parameter cell representation as a tuple."""
        return tuple(self._params)

[docs]    def set_params(self, value):
        """Set cell and params from the cell parameters."""
        self._params = value
        self.__mkcell()
        self.__mklammps()
        self._inverse = np.linalg.inv(self.cell.T)

    params = property(get_params, set_params)

[docs]    def minimum_supercell(self, cutoff):
        """Calculate the smallest supercell with a half-cell width cutoff.

        Increment from smallest cell vector to largest. So the supercell
        is not considering the 'unit cell' for each cell dimension.

        """

        a_cross_b = np.cross(self.cell[0], self.cell[1])
        b_cross_c = np.cross(self.cell[1], self.cell[2])
        c_cross_a = np.cross(self.cell[2], self.cell[0])

        #volume = np.dot(self.cell[0], b_cross_c)

        widths = [np.dot(self.cell[0], b_cross_c) / np.linalg.norm(b_cross_c),
                  np.dot(self.cell[1], c_cross_a) / np.linalg.norm(c_cross_a),
                  np.dot(self.cell[2], a_cross_b) / np.linalg.norm(a_cross_b)]

        return tuple(int(math.ceil(2*cutoff/x)) for x in widths)
        #diag = np.diag(self.cell)
        ##print(np.ceil(cutoff/diag*2.))
        #return tuple(int(i) for i in np.ceil(cutoff/diag*2.))

[docs]    def orthogonal_transformation(self):
        """Compute the transformation from the original unit cell to a supercell which
        has 90 degree angles between it's basis vectors. This is somewhat approximate,
        and the angles will not be EXACTLY 90 deg.

        """
        absmat = np.abs(self._inverse.T)
        zero_tol = 0.01
        # round all near - zero values to zero
        absmat[np.where(np.allclose(absmat, 0., atol=zero_tol))] = 0.
        divs = np.array([np.min(absmat[i, np.nonzero(absmat[i])]) for i in range(3)])

        M = np.around(self._inverse.T / divs[:,None])
        MN = M.copy()
        return MN

[docs]    def update_supercell(self, tuple):
        self._cell = np.multiply(self._cell.T, tuple).T
        self.__mkparam()
        self.__mklammps()
        self._inverse = np.linalg.inv(self._cell.T)

    @property
    def minimum_width(self):
        """The shortest perpendicular distance within the cell."""
        a_cross_b = np.cross(self.cell[0], self.cell[1])
        b_cross_c = np.cross(self.cell[1], self.cell[2])
        c_cross_a = np.cross(self.cell[2], self.cell[0])

        volume = np.dot(self.cell[0], b_cross_c)

        return volume / min(np.linalg.norm(b_cross_c), np.linalg.norm(c_cross_a), np.linalg.norm(a_cross_b))

    @property
    def inverse(self):
        """Inverted cell matrix for converting to fractional coordinates."""
        try:
            if self._inverse is None:
                self._inverse = np.linalg.inv(self.cell.T)
        except AttributeError:
            self._inverse = np.linalg.inv(self.cell.T)
        return self._inverse

    @property
    def crystal_system(self):
        """Return the IUCr designation for the crystal system."""
        #FIXME(tdaff): must be aligned with x to work
        if self.alpha == self.beta == self.gamma == 90:
            if self.a == self.b == self.c:
                return 'cubic'
            elif self.a == self.b or self.a == self.c or self.b == self.c:
                return 'tetragonal'
            else:
                return 'orthorhombic'
        elif self.alpha == self.beta == 90:
            if self.a == self.b and self.gamma == 120:
                return 'hexagonal'
            else:
                return 'monoclinic'
        elif self.alpha == self.gamma == 90:
            if self.a == self.c and self.beta == 120:
                return 'hexagonal'
            else:
                return 'monoclinic'
        elif self.beta == self.gamma == 90:
            if self.b == self.c and self.alpha == 120:
                return 'hexagonal'
            else:
                return 'monoclinic'
        elif self.a == self.b == self.c and self.alpha == self.beta == self.gamma:
            return 'trigonal'
        else:
            return 'triclinic'

    def __mkcell(self):
        """Update the cell representation to match the parameters."""
        a_mag, b_mag, c_mag = self.params[:3]
        alpha, beta, gamma = [x * DEG2RAD for x in self.params[3:]]
        a_vec = np.array([a_mag, 0.0, 0.0])
        b_vec = np.array([b_mag * np.cos(gamma), b_mag * np.sin(gamma), 0.0])
        c_x = c_mag * np.cos(beta)
        c_y = c_mag * (np.cos(alpha) - np.cos(gamma) * np.cos(beta)) / np.sin(gamma)
        c_vec = np.array([c_x, c_y, (c_mag**2 - c_x**2 - c_y**2)**0.5])
        self._cell = np.array([a_vec, b_vec, c_vec])

    def __mkparam(self):
        """Update the parameters to match the cell."""
        cell_a = np.sqrt(sum(x**2 for x in self.cell[0]))
        cell_b = np.sqrt(sum(x**2 for x in self.cell[1]))
        cell_c = np.sqrt(sum(x**2 for x in self.cell[2]))
        alpha = np.arccos(sum(self.cell[1, :] * self.cell[2, :]) /
                       (cell_b * cell_c)) * 180 / np.pi
        beta = np.arccos(sum(self.cell[0, :] * self.cell[2, :]) /
                      (cell_a * cell_c)) * 180 / np.pi
        gamma = np.arccos(sum(self.cell[0, :] * self.cell[1, :]) /
                       (cell_a * cell_b)) * 180 / np.pi
        self._params = (cell_a, cell_b, cell_c, alpha, beta, gamma)

    def __mklammps(self):
        a, b, c, alpha, beta, gamma = self._params
        lx = a
        xy = b*math.cos(gamma*DEG2RAD)
        xz = c*math.cos(beta*DEG2RAD)
        ly = math.sqrt(b**2 - xy**2)
        yz = (b*c*math.cos(alpha*DEG2RAD) - xy*xz)/ly
        lz = math.sqrt(c**2 - xz**2 - yz**2)
        self._lammps = (lx, ly, lz, xy, xz, yz)

    @property
    def lx(self):
        return self._lammps[0]
    @property
    def ly(self):
        return self._lammps[1]
    @property
    def lz(self):
        return self._lammps[2]
    @property
    def xy(self):
        return self._lammps[3]
    @property
    def xz(self):
        return self._lammps[4]
    @property
    def yz(self):
        return self._lammps[5]

    @property
    def a(self):
        """Magnitude of cell a vector."""
        return self.params[0]

    @property
    def b(self):
        """Magnitude of cell b vector."""
        return self.params[1]

    @property
    def c(self):
        """Magnitude of cell c vector."""
        return self.params[2]

    @property
    def alpha(self):
        """Cell angle alpha."""
        return self.params[3]

    @property
    def beta(self):
        """Cell angle beta."""
        return self.params[4]

    @property
    def gamma(self):
        """Cell angle gamma."""
        return self.params[5]

[docs]def clean(name):
    name = os.path.split(name)[-1]
    if name.endswith('.cif'):
        name = name[:-4]
    return name