diff --git a/polymerist/genutils/importutils/dependencies.py b/polymerist/genutils/importutils/dependencies.py
index 4787afe..a69b4c8 100644
--- a/polymerist/genutils/importutils/dependencies.py
+++ b/polymerist/genutils/importutils/dependencies.py
@@ -3,7 +3,7 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
-from typing import Callable, ParamSpec, TypeVar
+from typing import Callable, Optional, ParamSpec, TypeVar
 
 Params = ParamSpec('Params')
 ReturnType = TypeVar('ReturnType')
@@ -14,6 +14,25 @@
 from functools import wraps
 
 
+class MissingPrerequisitePackage(Exception):
+    '''Raised when a package dependency cannot be found and the user should be alerted with install instructions'''
+    def __init__(self,
+            importing_package_name : str,
+            use_case : str,
+            install_link : str,
+            dependency_name : str,
+            dependency_name_formal : Optional[str]=None
+        ):
+        if dependency_name_formal is None:
+            dependency_name_formal = dependency_name
+        
+        message = f'''
+        {use_case.capitalize()} require(s) {dependency_name_formal}, which was not found in the current environment
+        Please install `{dependency_name}` by following the installation instructions at {install_link}
+        Then try importing from "{importing_package_name}" again'''
+        
+        super().__init__(message)
+        
 def module_installed(module_name : str) -> bool:
     '''
     Check whether a module of the given name is present on the system
diff --git a/polymerist/genutils/textual/strsearch.py b/polymerist/genutils/textual/strsearch.py
deleted file mode 100644
index 919e797..0000000
--- a/polymerist/genutils/textual/strsearch.py
+++ /dev/null
@@ -1,41 +0,0 @@
-'''For searching and replacing through strings and text files'''
-
-__author__ = 'Timotej Bernat'
-__email__ = 'timotej.bernat@colorado.edu'
-
-from typing import Callable, Optional
-from pathlib import Path
-
-from ..fileutils.extensions import FileTypeError
-
-
-def shortest_repeating_substring(string : str) -> str:
-    '''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring
-    Will return the original string if no simpler decomposition exists'''
-    i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats)
-    return string if (i == -1) else string[:i]
-
-def filter_text_by_condition(in_text_path : Path, condition : Callable[[str], bool], out_text_path : Optional[Path]=None, postfix : str='filtered', inclusive : bool=True, return_filtered_path : bool=False) -> Optional[Path]:
-    '''Create a copy of a text-based file containing only the lines which match to a given boolean condition
-    
-    If no explicit output path is given, will create an output file in the same directory as the source file
-    with the same name plus "postfix" tacked on. Can optionally return the path to the filtered file (else None)
-
-    "Inclusive" kw governs whether to write lines which DO or DON'T meet the condition'''
-    if out_text_path is None:
-        out_text_path = in_text_path.with_stem(f'{in_text_path.stem}{"_" if postfix else ""}{postfix}')
-
-    if (out_text_path == in_text_path):
-        raise PermissionError(f'Attempting to overwrite {in_text_path} with regex filter') # prevent write clash
-    
-    if (out_text_path.suffix != in_text_path.suffix):  # prevent file type conversion during transfer
-        raise FileTypeError(f'Input and output file must have same extension (not {in_text_path.suffix} and {out_text_path.suffix})')
-
-    with out_text_path.open('w') as outfile: 
-        with in_text_path.open('r') as infile: # readfile is innermost in case error occurs during file read (caught by handler one level up)
-            for line in infile:
-                if (condition(line) == inclusive): # only write lines if (matching AND inclusive) OR (not matching AND exclusive)
-                    outfile.write(line)
-
-    if return_filtered_path:
-        return out_text_path
\ No newline at end of file
diff --git a/polymerist/genutils/textual/substrings.py b/polymerist/genutils/textual/substrings.py
new file mode 100644
index 0000000..83c6f4f
--- /dev/null
+++ b/polymerist/genutils/textual/substrings.py
@@ -0,0 +1,81 @@
+'''For identifying and concatenating substrings of other strings with unique properties'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+
+def unique_string(string : str, preserve_order : bool=True) -> str:
+    '''
+    Accepts a string and returns another string containing
+    only the UNIQUE characters in the origin string
+    
+    Can specify whether order is important with the "preserve_order" keyword
+    
+    Parameters
+    ----------
+    string : str
+        An arbitrary string on wants the unique characters from
+    preserve_order : bool, default True
+        Whether or not to keep the unique characters in the order they are found
+        For example: 
+            unique_string("balaclava", preserve_order=False) -> "bcavl"
+            unique_string("balaclava", preserve_order=True) -> "balcv"
+        
+    Returns
+    -------
+    uniquified_str : str
+        Another string containing only the unique characters in "string"
+        Order depends on the value of the "preserve_order" parameter
+    '''
+    if not preserve_order:
+        unique_chars = set(string)
+    else:
+        unique_chars = []
+        for char in string:
+            if char not in unique_chars:
+                unique_chars.append(char)
+    
+    return ''.join(unique_chars)
+
+def shortest_repeating_substring(string : str) -> str:
+    '''Return the shortest substring such that the passed string can be written as some number of repeats (including 1) of the substring
+    Will return the original string if no simpler decomposition exists'''
+    i = (2*string).find(string, 1, -1) # check if string matches itself in a cycle in non-trivial way (i.e more than just the two repeats)
+    return string if (i == -1) else string[:i]
+
+def repeat_string_to_length(string : str, target_length : int, joiner : str='') -> str:
+    '''
+    Takes a string and repeats it cyclically to produce another string of a given length
+    The number of times the original string occurs in the new string may be fractional
+    for example:
+    >> repeat_string_to_length("CAT", 6) -> "CATCAT"
+    >> repeat_string_to_length("BACA", 10) -> "BACABACABA"
+    
+    Parameters
+    ----------
+    string : str
+        An arbitrary string to repeat
+    target_length : int
+        The length of the final desired string
+        This does NOT have to be an integer multiple of the length of "string"
+            E.g. repeat_string_to_length("BACA", 10) -> "BACABACABA"
+        Nor does it have to be greater than the length of "string"
+            E.g. repeat_string_to_length("BACA", 3) -> "BAC"
+            
+    Returns
+    -------
+    rep_string : str
+        A new string which has the desired target length and consists of cycles of the initial string
+    '''
+    if not string:
+        raise ValueError(f'Cannot generate nonempty string from any amount of repeats of the empty string')
+    if not isinstance(target_length, int):
+        raise TypeError(f'Only integer target string lengths are allowed, not non-integer type "{type(target_length).__name__}"')
+    if target_length < 0:
+        raise IndexError(f'Cannot generate a string of negative length (requested length of {target_length} character(s))')
+    
+    num_str_reps, num_extra_chars = divmod(target_length, len(string))
+    remainder = (string[:num_extra_chars],) if num_extra_chars else () # empty container avoids extra joiner at end when remainder string is empty
+    
+    return joiner.join(num_str_reps*(string,) + remainder) # tuples here are ~2 OOM faster than moral equivalent with lists
+    
\ No newline at end of file
diff --git a/polymerist/mdtools/openfftools/__init__.py b/polymerist/mdtools/openfftools/__init__.py
index 8f1ab65..f0c5f59 100644
--- a/polymerist/mdtools/openfftools/__init__.py
+++ b/polymerist/mdtools/openfftools/__init__.py
@@ -4,13 +4,14 @@
 __email__ = 'timotej.bernat@colorado.edu'
 
 # Subpackage-wide precheck to see if OpenFF is even usable in the first place
-from ...genutils.importutils.dependencies import modules_installed
+from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage
 if not modules_installed('openff', 'openff.toolkit'):
-    raise ModuleNotFoundError(
-        f'''
-        OpenFF packages which are required to utilitize {__name__} not found in current environment
-        Please follow installation instructions at https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html, then retry import
-        '''
+    raise MissingPrerequisitePackage(
+        importing_package_name=__spec__.name,
+        use_case='OpenFF addons',
+        install_link='https://docs.openforcefield.org/projects/toolkit/en/stable/installation.html',
+        dependency_name='openff-toolkit',
+        dependency_name_formal='the OpenFF software stack',
     )
     
 # Import of toplevel OpenFF object registries
diff --git a/polymerist/mdtools/openfftools/boxvectors.py b/polymerist/mdtools/openfftools/boxvectors.py
index 49ea028..7495d6a 100644
--- a/polymerist/mdtools/openfftools/boxvectors.py
+++ b/polymerist/mdtools/openfftools/boxvectors.py
@@ -14,7 +14,7 @@
 from openff.toolkit import Topology
 from openff.interchange.components._packmol import _box_vectors_are_in_reduced_form
 
-from .omminter.unitsys import allow_openmm_units, openff_to_openmm
+from .unitsys import allow_openmm_units, openff_to_openmm
 
 
 # CUSTOM TYPES FOR CLARITY, ESPECIALLY WITH UNITS
diff --git a/polymerist/mdtools/openfftools/omminter/__init__.py b/polymerist/mdtools/openfftools/omminter/__init__.py
index db12755..e1522de 100644
--- a/polymerist/mdtools/openfftools/omminter/__init__.py
+++ b/polymerist/mdtools/openfftools/omminter/__init__.py
@@ -4,9 +4,3 @@
 __email__ = 'timotej.bernat@colorado.edu'
 
 from .mdobjects import forcefield_flexible, openff_topology_to_openmm
-from .unitsys import (
-    openmm_to_openff,
-    openff_to_openmm, 
-    allow_openmm_units,
-    allow_openff_units,
-)
\ No newline at end of file
diff --git a/polymerist/mdtools/openfftools/omminter/mdobjects.py b/polymerist/mdtools/openfftools/omminter/mdobjects.py
index 2ed2be9..7f0af55 100644
--- a/polymerist/mdtools/openfftools/omminter/mdobjects.py
+++ b/polymerist/mdtools/openfftools/omminter/mdobjects.py
@@ -15,7 +15,7 @@
 from openmm.app import Topology as OMMTopology
 from openmm.unit import Quantity
 
-from .unitsys import openff_to_openmm
+from ..unitsys import openff_to_openmm
 from .. import FFDIR
 from ..boxvectors import box_vectors_flexible, VectorQuantity, BoxVectorsQuantity
 
@@ -39,8 +39,13 @@ def forcefield_flexible(forcefield : Union[ForceField, str, Path]) -> ForceField
 
         return ForceField(ff_path)
 
-def openff_topology_to_openmm(offtop : OFFTopology, forcefield : Union[ForceField, str, Path], box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None,
-                              combine_nonbonded_forces : bool=False, add_constrained_forces : bool=False) -> tuple[OMMTopology, System, Quantity]:
+def openff_topology_to_openmm(
+            offtop : OFFTopology,
+            forcefield : Union[ForceField, str, Path],
+            box_vecs : Optional[Union[VectorQuantity, BoxVectorsQuantity]]=None,
+            combine_nonbonded_forces : bool=False,
+            add_constrained_forces : bool=False
+        ) -> tuple[OMMTopology, System, Quantity]:
     '''Converts an OpenFF Topology to an OpenMM Topology, System, and Positions'''
     if box_vecs is not None:
         offtop.box_vectors = box_vectors_flexible(box_vecs)
diff --git a/polymerist/mdtools/openfftools/solvation/physprops.py b/polymerist/mdtools/openfftools/solvation/physprops.py
index 1de70a2..bd9f108 100644
--- a/polymerist/mdtools/openfftools/solvation/physprops.py
+++ b/polymerist/mdtools/openfftools/solvation/physprops.py
@@ -15,7 +15,7 @@
 from openff.units import Quantity as OFFQuantity
 
 from ....unitutils.dimensions import is_volume
-from ..omminter.unitsys import allow_openff_units, openff_to_openmm
+from ..unitsys import allow_openff_units, openff_to_openmm
 
 
 # MASS
diff --git a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py
index f540da0..a11bf94 100644
--- a/polymerist/mdtools/openfftools/solvation/solvents/__init__.py
+++ b/polymerist/mdtools/openfftools/solvation/solvents/__init__.py
@@ -10,7 +10,6 @@
 from openff.units import unit as offunit
 
 from ... import topology
-from ... import TKREGS
 
 
 def generate_water_TIP3P() -> Molecule:
diff --git a/polymerist/mdtools/openfftools/omminter/unitsys.py b/polymerist/mdtools/openfftools/unitsys.py
similarity index 100%
rename from polymerist/mdtools/openfftools/omminter/unitsys.py
rename to polymerist/mdtools/openfftools/unitsys.py
diff --git a/polymerist/mdtools/openmmtools/serialization.py b/polymerist/mdtools/openmmtools/serialization.py
index 6bad38a..97348a6 100644
--- a/polymerist/mdtools/openmmtools/serialization.py
+++ b/polymerist/mdtools/openmmtools/serialization.py
@@ -23,6 +23,7 @@
 from ...genutils.fileutils.pathutils import assemble_path
 from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable
 from ...genutils.fileutils.jsonio.serialize import PathSerializer
+from ...molfiles.pdb import SerialAtomLabeller
 
 
 # DEFINING AND STORING SIMULATION PATHS
@@ -119,12 +120,18 @@ def serialize_system(sys_path : Path, system : System) -> None:
         file.write(XmlSerializer.serialize(system))
 
 @allow_string_paths
-def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions : Union[NDArray, list[Vec3]], keep_chain_and_res_ids : bool=True,
-                         uniquify_atom_ids : bool=True, num_atom_id_digits : int=2, resname_repl : Optional[dict[str, str]]=None) -> None:
+def serialize_openmm_pdb(
+        pdb_path : Path,
+        topology : OpenMMTopology,
+        positions : Union[NDArray, list[Vec3]],
+        keep_chain_and_res_ids : bool=True,
+        atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(),
+        resname_map : Optional[dict[str, str]]=None,
+    ) -> None:
     '''Configure and write an Protein DataBank File from an OpenMM Topology and array of positions
     Provides options to configure atom ID numbering, residue numbering, and residue naming'''
-    if resname_repl is None:
-        resname_repl = {} # avoids mutable default
+    if resname_map is None:
+        resname_map = {} # avoids mutable default
 
     # chain config
     for chain in topology.chains():
@@ -133,18 +140,14 @@ def serialize_openmm_pdb(pdb_path : Path, topology : OpenMMTopology, positions :
     # residue config
     for residue in topology.residues():
         residue.id = str(residue.id) # avoids TypeError when specifying keepIds during PDB write
-        repl_res_name = resname_repl.get(residue.name, None) # lookup current residue name to see if a replacement is called for
+        repl_res_name = resname_map.get(residue.name, None) # lookup current residue name to see if a replacement is called for
         if repl_res_name is not None:
             residue.name = repl_res_name
 
     # individual atom config
-    element_counter = Counter() # for keeping track of the running index of each distinct element - could be used to produce a Hill formula
-    for atom in topology.atoms():
-        symbol = atom.element.symbol
-        atom_id = element_counter[symbol]
-        if uniquify_atom_ids:
-            atom.name = f'{symbol}{atom_id:0{num_atom_id_digits}d}' # extend atom name with ordered integer with specified number of digits (including leading zeros)
-        element_counter[symbol] += 1
+    if atom_labeller: # implicitly, preserves extant atom names if a labeller is not given
+        for atom in topology.atoms():
+            atom.name = atom_labeller.get_atom_label(atom.element.symbol)
 
     # file write
     with pdb_path.open('w') as file:
diff --git a/polymerist/molfiles/__init__.py b/polymerist/molfiles/__init__.py
new file mode 100644
index 0000000..314438a
--- /dev/null
+++ b/polymerist/molfiles/__init__.py
@@ -0,0 +1,4 @@
+'''Utilities for reading from and writing to various molecular file formats'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
\ No newline at end of file
diff --git a/polymerist/molfiles/pdb.py b/polymerist/molfiles/pdb.py
new file mode 100644
index 0000000..1852423
--- /dev/null
+++ b/polymerist/molfiles/pdb.py
@@ -0,0 +1,75 @@
+'''PDB file formatting tools'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+from dataclasses import dataclass, field
+from collections import Counter
+
+
+@dataclass(frozen=True)
+class SerialAtomLabeller:
+    '''
+    For assigning unique numbered atom names based on their
+    order of appearance within a molecule and elemental class
+    
+    Useful, for example, in generating unique atom names for a PDB file
+    
+    Parameters
+    ----------
+    atom_label_width : int , default 4      
+        Exact length alloted for any generated atom label
+        Labels shorter than this are right-padded with spaces,
+        while labels longer than this are truncated
+        
+        Default of 4 is the chosen to be compatible with the PDB specification ("Atom name: lines 13-16, left-justified")
+        https://www.cgl.ucsf.edu/chimera/docs/UsersGuide/tutorials/pdbintro.html
+    include_elem_idx : bool, default True  
+        Whether to attach a numerical element-index postfix to atom labels
+        
+        E.g. with atom_label_width=4, the fifth carbon in a topology  
+        will be labelled as "C004" with include_elem_idx=True, 
+        while labelled as "C   " with include_elem_idx=False, 
+    default_elem_idx : int, default 0
+        Starting index for each element category
+        By default, is 0-indexed; MUST BE POSITIVE
+    '''
+    atom_label_width : int = 4
+    include_elem_idx : bool = True
+    default_elem_idx : int = 0
+    
+    element_counter : Counter = field(init=False, default_factory=Counter)
+    
+    def __post_init__(self) -> None:
+        '''Check ranges on input values'''
+        if self.atom_label_width < 0:
+            raise ValueError(f'Must provide a non-negative number of index digits to include (provided {self.atom_label_width})')
+
+        if self.default_elem_idx < 0:
+            raise ValueError(f'Must provide a non-negative starting index for element indices (provided {self.default_elem_idx})')
+    
+    def get_atom_label(self, elem_symbol : str) -> str:
+        '''
+        Obtain a numbered atom label for an atom based on its element, 
+        updating the underlying element context in the process
+        '''
+        if not isinstance(elem_symbol, str):
+            raise TypeError(f'Must pass symbol of atom\'s element as str (not type {type(elem_symbol).__name__})')
+        
+        if elem_symbol not in self.element_counter: # initialize first occurence to starting value
+            self.element_counter[elem_symbol] = self.default_elem_idx
+            
+        atom_idx_label : str = ''
+        if self.include_elem_idx:
+            atom_idx = self.element_counter[elem_symbol]
+            num_idx_digits = max(self.atom_label_width - len(elem_symbol), 0) # number of symbols left over for an atom index
+            atom_idx_label = f'{atom_idx:0{num_idx_digits}d}'
+        
+        atom_name = f'{elem_symbol}{atom_idx_label}'
+        atom_name = atom_name.ljust(self.atom_label_width, ' ')[:self.atom_label_width] # pad with spaces if too short, or truncate if too long
+        assert(len(atom_name) <= self.atom_label_width) # perfunctory check to make sure things are working as expected
+        
+        self.element_counter[elem_symbol] += 1 # update tally with addition of new occurence of a particular element
+        
+        return atom_name
+    
\ No newline at end of file
diff --git a/polymerist/polymers/building.py b/polymerist/polymers/building.py
deleted file mode 100644
index e7898c3..0000000
--- a/polymerist/polymers/building.py
+++ /dev/null
@@ -1,111 +0,0 @@
-'''Utilities for building new polymer structures; currently limited to linear polymers and PDB save format'''
-
-__author__ = 'Timotej Bernat'
-__email__ = 'timotej.bernat@colorado.edu'
-
-import logging
-LOGGER = logging.getLogger(__name__)
-
-import warnings
-with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings
-    warnings.filterwarnings('ignore',  category=DeprecationWarning)
-    import mbuild as mb
-    from mbuild import Compound
-    from mbuild.lib.recipes.polymer import Polymer as MBPolymer
-
-from pathlib import Path
-from rdkit import Chem
-
-from .exceptions import MorphologyError
-from .estimation import estimate_chain_len_linear
-from ..polymers.monomers.repr import MonomerGroup
-from ..polymers.monomers.specification import SANITIZE_AS_KEKULE
-
-from ..genutils.decorators.functional import allow_string_paths
-from ..rdutils.bonding.portlib import get_linker_ids
-from ..rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports
-from ..mdtools.openmmtools.serialization import serialize_openmm_pdb
-
-
-# CONVERSION
-def mbmol_from_mono_rdmol(rdmol : Chem.Mol) -> tuple[Compound, list[int]]:
-    '''Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports'''
-    linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility
-    
-    # create port-free version of molecule which RDKit can embed without errors
-    prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False)
-    # prot_mol = saturate_ports(rdmol) # TOSELF : custom, port-based saturation methods are not yet ready for deployment - yield issues in RDKit representation under-the-hood 
-    Chem.SanitizeMol(prot_mol, sanitizeOps=SANITIZE_AS_KEKULE) # ensure Mol is valid (avoids implicitValence issues)
-    mb_compound = mb.conversion.from_rdkit(prot_mol) # native from_rdkit() method actually appears to preserve atom ordering
-
-    return mb_compound, linker_ids
-
-@allow_string_paths
-def mbmol_to_openmm_pdb(pdb_path : Path, mbmol : Compound, num_atom_digits : int=2, res_repl : dict[str, str]=None) -> None:
-    '''Save an MBuild Compound into an OpenMM-compatible PDB file'''
-    if res_repl is None: # avoid mutable default
-        res_repl = {'RES' : 'Pol'} 
-
-    traj = mbmol.to_trajectory() # first convert to MDTraj representation (much more infor-rich format)
-    omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory
-
-    serialize_openmm_pdb(
-        pdb_path,
-        topology=omm_top,
-        positions=omm_pos,
-        uniquify_atom_ids=True,
-        num_atom_id_digits=num_atom_digits,
-        resname_repl=res_repl
-    )
-
-
-# LINEAR POLYMER BUILDING
-def build_linear_polymer(monomers : MonomerGroup, DOP : int, sequence : str='A', add_Hs : bool=False, energy_minimize : bool=False) -> MBPolymer:
-    '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON)
-    and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object'''
-    # 0) VERIFY THAT CHAIN ACTUAL CAN DEFINE LINEAR POLYMER
-    if not monomers.is_linear:
-        raise MorphologyError('Linear polymer building does not support non-linear monomer input')
-    
-    if monomers.has_valid_linear_term_orient:
-        term_orient = monomers.term_orient
-        LOGGER.info(f'Using pre-defined terminal group orientation {term_orient}')
-    else:
-        term_orient = {
-            resname : orient
-                for (resname, rdmol), orient in zip(monomers.iter_rdmols(term_only=True), ['head', 'tail']) # will raise StopIteration if fewer
-        }
-        LOGGER.warning(f'No valid terminal monomer orientations defined; autogenerated orientations "{term_orient}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!')
-
-    # 1) ADD MIDDLE MONOMERS TO CHAIN
-    chain = MBPolymer() 
-    for (resname, middle_monomer), sequence_key in zip(monomers.iter_rdmols(term_only=False), sequence): # zip with sequence limits number of middle monomers to length of block sequence
-        LOGGER.info(f'Registering middle monomer {resname} (block identifier "{sequence_key}")')
-        mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer)
-        chain.add_monomer(compound=mb_monomer, indices=linker_ids)
-
-    # 2) ADD TERMINAL MONOMERS TO CHAIN
-    term_iters = { # need to convert to iterators to allow for generator-like advancement (required for term group selection to behave as expected)
-        resname : iter(rdmol_list)   # made necessary by annoying list-bound structure of current substructure spec
-            for resname, rdmol_list in monomers.rdmols(term_only=True).items() 
-    }
-    for resname, head_or_tail in term_orient.items():
-        LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")')
-        term_monomer = next(term_iters[resname])
-        mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer)
-        chain.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation
-
-    # 3) ASSEMBLE AND RETURN CHAIN
-    n_atoms = estimate_chain_len_linear(monomers, DOP)
-    LOGGER.info(f'Assembling linear polymer chain with {DOP} monomers ({n_atoms} atoms)')
-    chain.build(DOP - 2, sequence=sequence, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers)
-    for atom in chain.particles():
-        atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings)
-    LOGGER.info(f'Successfully assembled linear polymer chain with {DOP} monomers ({n_atoms} atoms)')
-    
-    if energy_minimize:
-        LOGGER.info('Energy-minimizing chain to find more stabile conformer')
-        chain.energy_minimize()
-        LOGGER.info('Energy minimization completed')
-
-    return chain
\ No newline at end of file
diff --git a/polymerist/polymers/building/__init__.py b/polymerist/polymers/building/__init__.py
new file mode 100644
index 0000000..dfe3e3d
--- /dev/null
+++ b/polymerist/polymers/building/__init__.py
@@ -0,0 +1,21 @@
+'''
+Tools for building polymer conformers out of monomer SMARTS fragments
+Currently restricted to building linear homopolymers and periodic block copolymers
+'''
+
+from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage
+
+if not modules_installed('mbuild'):
+    raise MissingPrerequisitePackage(
+        importing_package_name=__spec__.name,
+        use_case='Polymer building',
+        install_link='https://mbuild.mosdef.org/en/stable/getting_started/installation/installation.html',
+        dependency_name='mbuild',
+        dependency_name_formal='mBuild',
+    )
+    
+from .linear import build_linear_polymer
+from .mbconvert import (
+    mbmol_from_mono_rdmol, mbmol_to_rdmol,
+    mbmol_to_openmm_pdb, mbmol_to_rdkit_pdb,
+)
\ No newline at end of file
diff --git a/polymerist/polymers/building/linear.py b/polymerist/polymers/building/linear.py
new file mode 100644
index 0000000..dab50f4
--- /dev/null
+++ b/polymerist/polymers/building/linear.py
@@ -0,0 +1,88 @@
+'''For generating linear polymer structure from monomer, sequence, and chain length information'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+import logging
+LOGGER = logging.getLogger(__name__)
+
+import warnings
+with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings
+    warnings.filterwarnings('ignore',  category=DeprecationWarning)
+    from mbuild.lib.recipes.polymer import Polymer as MBPolymer
+
+from .mbconvert import mbmol_from_mono_rdmol
+from .sequencing import LinearCopolymerSequencer
+from ..exceptions import MorphologyError
+from ..monomers.repr import MonomerGroup
+from ..estimation import estimate_n_atoms_linear
+from ...genutils.textual.substrings import unique_string
+
+
+def build_linear_polymer(
+        monomers : MonomerGroup,
+        n_monomers : int,
+        sequence : str='A',
+        minimize_sequence : bool=True,
+        allow_partial_sequences : bool=False,
+        add_Hs : bool=False,
+        energy_minimize : bool=False,
+    ) -> MBPolymer:
+    '''Accepts a dict of monomer residue names and SMARTS (as one might find in a monomer JSON)
+    and a degree of polymerization (i.e. chain length in number of monomers)) and returns an mbuild Polymer object'''
+    # 1) DETERMINE NUMBER OF SEQUENCE REPEATS NEEDED TO MEET TARGET NUMBER OF MONOMER UNITS (IF POSSIBLE) - DEV: consider making a separate function
+    end_groups = monomers.linear_end_groups() # cache end groups so they dont need to be recalculated when registering end groups
+    end_group_names = [resname for (resname, _) in end_groups.values()]
+    
+    sequencer = LinearCopolymerSequencer(
+        sequence_kernel=sequence,
+        n_repeat_units=n_monomers,
+        n_repeat_units_terminal=len(end_groups)
+    )
+    if minimize_sequence:
+        sequencer.reduce() # identify minimal subsequences
+    
+    sequence_compliant, n_seq_repeats = sequencer.procrustean_alignment(allow_partial_sequences=allow_partial_sequences)
+    LOGGER.info(
+        f'Target chain length achievable with {sequencer.describe_tally()}, ' \
+        f'namely with the sequence {sequencer.describe_order(end_group_names=end_group_names)}'
+    )
+    sequence_unique = unique_string(sequence_compliant, preserve_order=True) # only register a new monomer for each appearance of a new, unique symbol in the sequence
+    
+    # 2) REGISTERING MONOMERS TO BE USED FOR CHAIN ASSEMBLY
+    polymer = MBPolymer() 
+    monomers_selected = MonomerGroup() # used to track and estimate sized of the monomers being used for building
+    
+    ## 2A) ADD MIDDLE MONOMERS TO CHAIN
+    for symbol, (resname, middle_monomer) in zip(sequence_unique, monomers.iter_rdmols(term_only=False)): # zip with sequence limits number of middle monomers to length of block sequence
+        LOGGER.info(f'Registering middle monomer {resname} (block identifier "{symbol}")')
+        mb_monomer, linker_ids = mbmol_from_mono_rdmol(middle_monomer, resname=resname)
+        polymer.add_monomer(compound=mb_monomer, indices=linker_ids)
+        monomers_selected.monomers[resname] = monomers.monomers[resname]
+
+    ## 2B) ADD TERMINAL MONOMERS TO CHAIN
+    for head_or_tail, (resname, term_monomer) in end_groups.items():
+        LOGGER.info(f'Registering terminal monomer {resname} (orientation "{head_or_tail}")')
+        mb_monomer, linker_ids = mbmol_from_mono_rdmol(term_monomer, resname=resname)
+        polymer.add_end_groups(compound=mb_monomer, index=linker_ids.pop(), label=head_or_tail, duplicate=False) # use single linker ID and provided head-tail orientation
+        monomers_selected.monomers[resname] = monomers.monomers[resname]
+
+    # 3) ASSEMBLE AND RETURN CHAIN
+    if not monomers_selected.is_linear: # verify the selected monomers actually define a linear polymer
+        raise MorphologyError('Linear polymer building does not support non-linear monomer input')
+    
+    n_atoms_est = estimate_n_atoms_linear(monomers_selected, n_monomers) # TODO: create new MonomerGroup with ONLY the registered monomers to guarantee accuracy
+    LOGGER.info(f'Assembling linear {n_monomers}-mer chain (estimated {n_atoms_est} atoms)')
+    
+    polymer.build(n_seq_repeats, sequence=sequence_compliant, add_hydrogens=add_Hs) # "-2" is to account for term groups (in mbuild, "n" is the number of times to replicate just the middle monomers)
+    for atom in polymer.particles():
+        atom.charge = 0.0 # initialize all atoms as being uncharged (gets rid of pesky blocks of warnings)
+    LOGGER.info(f'Successfully assembled linear {n_monomers}-mer chain (exactly {polymer.n_particles} atoms)')
+    
+    # 4) OPTIONALLY, PERFORM FINAL UFF ENERGY MINIMIZATION
+    if energy_minimize:
+        LOGGER.info('Energy-minimizing chain to find more stable conformer')
+        polymer.energy_minimize()
+        LOGGER.info('Energy minimization completed')
+
+    return polymer
\ No newline at end of file
diff --git a/polymerist/polymers/building/mbconvert.py b/polymerist/polymers/building/mbconvert.py
new file mode 100644
index 0000000..61656d9
--- /dev/null
+++ b/polymerist/polymers/building/mbconvert.py
@@ -0,0 +1,135 @@
+'''
+Enhanced conversions to and from mbuild Compound objects which
+preserve more molecular information than the utilities provided by 
+'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+from ...genutils.importutils.dependencies import modules_installed, MissingPrerequisitePackage
+
+if not modules_installed('openbabel'):
+    raise MissingPrerequisitePackage(
+        importing_package_name=__spec__.name,
+        use_case='Translation between chemical representations of polymers',
+        install_link='https://libraries.io/conda/openbabel',
+        dependency_name='openbabel',
+        dependency_name_formal='the OpenBabel chemical toolbox',
+    )
+
+from typing import Optional
+from pathlib import Path
+
+import warnings
+with warnings.catch_warnings(record=True): # suppress numerous and irritating mbuild deprecation warnings
+    warnings.filterwarnings('ignore',  category=DeprecationWarning)
+    from mbuild import Compound
+    from mbuild.conversion import from_rdkit
+    
+from rdkit import Chem
+
+from ...genutils.decorators.functional import allow_string_paths, allow_pathlib_paths
+from ..monomers.specification import SANITIZE_AS_KEKULE
+from ...molfiles.pdb import SerialAtomLabeller
+from ...rdutils.bonding.portlib import get_linker_ids
+from ...rdutils.bonding.substitution import saturate_ports, hydrogenate_rdmol_ports
+from ...mdtools.openmmtools.serialization import serialize_openmm_pdb
+
+
+# Conversion from other formats to Compound
+def mbmol_from_mono_rdmol(rdmol : Chem.Mol, resname : Optional[str]=None) -> tuple[Compound, list[int]]:
+    '''
+    Accepts a monomer-spec-compliant SMARTS string and returns an mbuild Compound and a list of the indices of atom ports
+    If "resname" is provided, will assign that name to the mBuild Compound returned
+    '''
+    linker_ids = [i for i in get_linker_ids(rdmol)] # record indices of ports - MUST unpack generator for mbuild compatibility
+    
+    # create port-free version of molecule which RDKit can embed without errors
+    prot_mol = hydrogenate_rdmol_ports(rdmol, in_place=False)
+    # prot_mol = saturate_ports(rdmol) # TOSELF : custom, port-based saturation methods are not yet ready for deployment - yield issues in RDKit representation under-the-hood 
+    Chem.SanitizeMol(prot_mol, sanitizeOps=SANITIZE_AS_KEKULE) # ensure Mol is valid (avoids implicitValence issues)
+    
+    mb_compound = from_rdkit(prot_mol) # native from_rdkit() method actually appears to preserve atom ordering
+    if resname is not None:
+        mb_compound.name = resname
+
+    return mb_compound, linker_ids
+   
+# Conversion from Compound to other formats
+_DEFAULT_RESNAME_MAP : dict[str, str] = { # module-wide config for default PDB residue name replacements for polymers
+    'RES' : 'Pol',
+}
+
+def mbmol_to_rdmol( # TODO: deduplify PDB atom name and residue numbering code against serialize_openmm_pdb()
+        mbmol : Compound,
+        atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(),
+        resname_map : Optional[dict[str, str]]=None
+    ) -> Chem.Mol:
+    '''Convert an mBuild Compound into an RDKit Mol, with correct atom coordinates and PDB residue info'''
+    if resname_map is None:
+        resname_map = _DEFAULT_RESNAME_MAP
+    
+    rdmol = mbmol.to_rdkit()
+    conformer = Chem.Conformer()
+    conformer.Set3D(True)
+
+    atom_id : int = 0
+    for resnum, mb_monomer in enumerate(mbmol.children, start=1):
+        resname = resname_map.get(mb_monomer.name, mb_monomer.name[:3]) # if no remapping is found, just take first 3 chars
+        # NOTE: the order of monomers and atoms within those monomers were added in the same order as iterated over here...
+        #... so the atom indices **SHOULD** be in the correct order (hate that this even might be uncertain)
+        for mbatom in mb_monomer.particles(): 
+            conformer.SetAtomPosition(atom_id, 10*mbatom.pos.astype(float)) # conveert from nm to angstrom
+
+            # set PDB residue info if monomer hierarchy is present
+            if mbatom != mb_monomer: # for Compounds with a flat hierarchy, the children and particles of children will coincide
+                pdb_info = Chem.AtomPDBResidueInfo(
+                    atomName=4*' ' if not atom_labeller else atom_labeller.get_atom_label(mbatom.element.symbol), 
+                    residueName=resname,
+                    residueNumber=resnum,
+                    chainId='1',
+                    isHeteroAtom=True,
+                )
+                rdmol.GetAtomWithIdx(atom_id).SetPDBResidueInfo(pdb_info)
+            atom_id += 1 # TODO: this is an awful waay of keeping track of atom indices, see if there's a more secure way to do this
+    conf_id = rdmol.AddConformer(conformer) # NOTE: recording this to self-document return values (this is intentionally not used)
+    
+    return rdmol
+
+# Serialization of Compounds to files
+@allow_pathlib_paths
+def mbmol_to_rdkit_pdb(
+        pdb_path : str,
+        mbmol : Compound, 
+        atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(),
+        resname_map : Optional[dict[str, str]]=None,
+    ) -> None:
+    '''Save an MBuild Compound into an RDKit-formatted PDB file'''
+    Chem.MolToPDBFile(
+        mbmol_to_rdmol(mbmol, atom_labeller=atom_labeller, resname_map=resname_map),
+        pdb_path,
+    )
+    
+@allow_string_paths
+def mbmol_to_openmm_pdb(
+        pdb_path : Path,
+        mbmol : Compound, 
+        atom_labeller : Optional[SerialAtomLabeller]=SerialAtomLabeller(),
+        resname_map : Optional[dict[str, str]]=None,
+    ) -> None:
+    '''Save an MBuild Compound into an OpenMM-formatted PDB file'''
+    if resname_map is None: # avoid mutable default
+        resname_map = _DEFAULT_RESNAME_MAP 
+
+     # NOTE: converting through MDTraj first before going to OpenMM preserves much
+     # of the necessary chemical info that is discarded when converting through other formats
+    traj = mbmol.to_trajectory(residues=[residue.name for residue in mbmol.children]) # extract names of repeat units
+    omm_top, omm_pos = traj.top.to_openmm(), traj.openmm_positions(0) # extract OpenMM representations of trajectory
+
+    serialize_openmm_pdb(
+        pdb_path,
+        topology=omm_top,
+        positions=omm_pos,
+        atom_labeller=atom_labeller,
+        resname_map=resname_map,
+    )
\ No newline at end of file
diff --git a/polymerist/polymers/building/sequencing.py b/polymerist/polymers/building/sequencing.py
new file mode 100644
index 0000000..cd52c69
--- /dev/null
+++ b/polymerist/polymers/building/sequencing.py
@@ -0,0 +1,199 @@
+'''For generating and manipulating sequences of symbols which correspond to monomer ordering in blocky and random copolymers'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+import logging
+LOGGER = logging.getLogger(__name__)
+
+from typing import Iterable, Optional
+from dataclasses import dataclass, field, asdict
+
+from ...genutils.textual.substrings import shortest_repeating_substring, repeat_string_to_length
+from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable
+from ..exceptions import EndGroupDominatedChain, InsufficientChainLength, EmptyBlockSequence, PartialBlockSequence
+
+
+@make_jsonifiable
+@dataclass
+class LinearCopolymerSequencer:
+    '''
+    For encapsulating information about the sequence of repeat units in a periodic, linear copolymer
+    Also covers, as trivial special cases, homopolymers and alternating copolymers
+    
+    Parameters
+    ----------
+    sequence_kernel : str
+        A sequence indicating a periodic ordering of monomers in a linear polymer block (e.g. "A", "ABAC", etc)
+        Each unique symbol in the sequence corresponds to a distinct monomer in the block
+    n_repeat_units : int
+        The desired total number of monomers (including terminal monomers) in a polymer chain
+    n_monomers_terminal : int
+        The number of terminal monomers ("end groups") which are to be included in the chain
+        in addition to the middle monomers described by "sequence"
+        
+    Raises
+    ------
+    EmpyBlockSequence
+        The sequence provided is empty (can't be used to define nonzero-length chain)
+    End GroupDominatedChain
+        The number of terminal monomers exceed the number of total monomers
+    '''
+    sequence_kernel : str
+    n_repeat_units : int
+    n_repeat_units_terminal : int = 0
+    
+    # Attribute checks and modifications
+    def __post_init__(self) -> None:
+        if not self.sequence_kernel:
+            raise EmptyBlockSequence('Must provide non-empty sequence kernel to yield a valid (co)polymer sequence')
+    
+        if self.n_repeat_units_middle < 0:
+            raise EndGroupDominatedChain(
+                f'Number of terminal monomers exceeds requested chain length; ({self.n_repeat_units}-mer ' \
+                f'chain can\'t possibly contain {self.n_repeat_units_terminal} terminal monomers)'
+            )
+            
+    def copy(self) -> 'LinearCopolymerSequencer':
+        '''Returns another equivalent instance of the current sequence info more efficiently than a complete deepcopy'''
+        return self.__class__(**asdict(self))
+            
+    def reduce(self) -> None:
+        '''
+        Determines if there is a shorter repeating subsequence making up the current sequence kernel
+        If there is, adjusts the sequence kernel to that minimal sequence; does nothing otherwise
+        
+        Reduction is idempotent, and guarantees that the smallest possible kernel is used when sequencing
+        '''
+        minimal_subsequence = shortest_repeating_substring(self.sequence_kernel)
+        kernel_period = self.block_size // len(minimal_subsequence) # account for any periodic shortening WITHIN the kernel
+        
+        if kernel_period == 1:
+            LOGGER.info(f'Sequence kernel "{self.sequence_kernel}" is already fully reduced; no changes made')
+            return
+        else:
+            LOGGER.info(
+                f'Sequence kernel "{self.sequence_kernel}" can be further decomposed as {kernel_period}*"{minimal_subsequence}"; ' \
+                f'Setting kernel to minimal subsequence "{minimal_subsequence}"'
+            )
+            self.sequence_kernel = minimal_subsequence
+    
+    def reduced(self) -> 'LinearCopolymerSequencer':
+        '''Return a sequence-reduced version of the current sequence info'''
+        clone = self.copy()
+        clone.reduce()
+        
+        return clone
+        
+    # Properties derived from sequence kernel and target chain lengths
+   
+    @property
+    def n_repeat_units_middle(self) -> int:
+        '''Number of middle (i.e. non-terminal) repeat units'''
+        return self.n_repeat_units - self.n_repeat_units_terminal
+
+    # Whole sequence periods
+    @property
+    def block_size(self) -> int:
+        '''Number of repeat units units in one whole iteration of the kernel block'''
+        return len(self.sequence_kernel)
+    period = block_size
+    
+    @property
+    def n_full_periods(self) -> int:
+        '''
+        Largest number of complete repetitions of the sequence kernel which, when taken
+        together, contain no more repeats units than the specified number of middle units
+        '''
+        return self.n_repeat_units_middle // self.block_size
+    
+    # Partial sequence residues
+    @property
+    def n_residual_repeat_units(self) -> int:
+        '''
+        Difference between number of middle repeat units and units which
+        would occur in maximal full periods of the kernel
+        
+        By construction, is no greater than the block size and is
+        identically zero exactly when a whole number of kernel repeats
+        '''
+        return self.n_repeat_units_middle % self.block_size
+    n_residual_symbols = n_res = n_residual_repeat_units
+    
+    @property
+    def has_residual(self) -> bool:
+        '''
+        Whether or not the target number of middle repeat units
+        can be attained by a whole number of kernel repeats
+        '''
+        return bool(self.n_residual_repeat_units)
+    
+    @property
+    def sequence_residual(self) -> str:
+        '''Partial repeat of the kernel sequence needed to attain the speficied number of middle units'''
+        return self.sequence_kernel[:self.n_residual_repeat_units]
+    residual = sequence_residual
+    
+    ## PROCRUSTEAN sequence alignment
+    def procrustean_alignment(self, allow_partial_sequences : bool=False) -> tuple[str, int]:
+        '''
+        PROCRUSTEAN: Periodic Recurrence Of Cyclic Repeat Unit Sequences, Truncated to an Exact and Arbitrary Number
+        Stretches or truncates the sequence kernel to achieve a target sequence length, cycling through the kernel's period as many times as needed
+        
+        Algorithm produces a sequence string "P" and number of repeats "r" which, taken together, satisfy the following:
+        - The number of units in r repeats of P plus the number of terminal monomers is precisely equal to the target number of monomers
+        - The units in P cycle through the units in S, in the order they appear in S
+        - The number of times S is cycled through in P is always a rational multiple of the length of S
+        If no satisfiable sequence-count pair can be found, raises an appropriate informative exception
+        '''
+        if not self.has_residual: # the case where the target length happens to consist of a whole-number of repeats of the kernel
+            if self.n_full_periods < 1: # NOTE: if it were up to me, this would be < 0 to allow dimers, but mBuild has forced my hand
+                raise InsufficientChainLength(
+                    f'{self.n_repeat_units}-monomer chain cannot accomodate both {self.n_repeat_units_terminal} end groups AND at least 1 middle monomer sequence'
+                )
+            sequence_procrustean = self.sequence_kernel
+            n_seq_repeats = self.n_full_periods
+        else:
+            if not allow_partial_sequences:
+                raise PartialBlockSequence(
+                    f'Partial polymer block sequence required to meet target number of monomers ("{self.residual}" prefix of sequence "{self.sequence_kernel}");\n' \
+                    'If this is acceptable, set "allow_partial_sequences=True" and try calling build routine again'
+                )    
+            sequence_procrustean = repeat_string_to_length(self.sequence_kernel, target_length=self.n_repeat_units_middle, joiner='')
+            n_seq_repeats = 1 # just repeat the entire mixed-fraction length sequence (no full sequence repeats to exploit)
+            
+        return sequence_procrustean, n_seq_repeats
+    
+    def describe_order(self, end_group_names : Optional[Iterable[str]]=None, default_end_group_name : str='END-GROUP') -> str:
+        '''Descriptive string presenting a condensed view of the order of repeat units in the final sequence'''
+        # Assign names for end groups
+        if end_group_names is None:
+            end_group_names = [f'[{default_end_group_name}]']*self.n_repeat_units_terminal 
+        else:
+            end_group_names = [f'[{end_group_name}]' for end_group_name in end_group_names] # unpack into list and enforce correct number of names
+        if (num_names_provided := len(end_group_names)) != self.n_repeat_units_terminal: # DEV: consider supporting filling in missing names with default in future
+            raise IndexError(f'Defined sequence info with {self.n_repeat_units_terminal} end groups, but only provided names for {num_names_provided}')
+        
+        # Insert middle omnomer parts as necessary
+        sequence_middle = []
+        if self.n_full_periods != 0: ## Whole sequence strings
+            sequence_middle.append(f'{self.n_full_periods}*[{self.sequence_kernel}]')
+        if self.has_residual: ## Partial sequence strings
+            sequence_middle.append(f'[{self.residual}]')
+            
+        # Abut with correct amount of end group indicators
+        sequence_parts = end_group_names[:] 
+        sequence_parts[1:-1] = sequence_middle
+        
+        return ' + '.join(sequence_parts)
+    
+    def describe_tally(self) -> str:
+        '''Descriptive string indicating how all parts of the overall sequence contribute to the target number of repeat units'''
+        desc_seq_counts_parts = []
+        if self.n_full_periods != 0: ## Whole sequence strings
+            desc_seq_counts_parts.append(f'{self.n_full_periods} whole {self.block_size}-sequence repeat(s)')
+        if self.has_residual: ## Partial sequence strings
+            desc_seq_counts_parts.append(f'a partial {self.n_residual_repeat_units}/{self.block_size} sequence repeat')
+            
+        return ' and '.join(desc_seq_counts_parts)
+        
\ No newline at end of file
diff --git a/polymerist/polymers/estimation.py b/polymerist/polymers/estimation.py
index fa56799..b075888 100644
--- a/polymerist/polymers/estimation.py
+++ b/polymerist/polymers/estimation.py
@@ -4,18 +4,16 @@
 __email__ = 'timotej.bernat@colorado.edu'
 
 import numpy as np
-from rdkit import Chem
 
-from .exceptions import InsufficientChainLengthError
-from ..genutils.iteration import iter_len
+from .exceptions import InsufficientChainLength
 from ..polymers.monomers.repr import MonomerGroup
 from ..rdutils.bonding.portlib import get_num_ports
 
 
-def estimate_chain_len_linear(monomers : MonomerGroup, DOP : int) -> int:
+def estimate_n_atoms_linear(monomers : MonomerGroup, n_monomers : int) -> int:
     '''Given a set of monomers and the desired degree of polymerization, estimate the length of the resulting chain
     !NOTE! : As-implemented, only works for linear homopolymers and block copolymers with equal an distribution of monomers'''
-    # TOSELF : omitted logging for now, as it gets repeated on EVERY cycle in when called estimate_DOP_lower
+    # TOSELF : omitted logging for now, as it gets repeated on EVERY cycle in when called estimate_n_monomers_supremum()
     num_mono = monomers.n_monomers
     mono_term    = np.zeros(num_mono, dtype=bool) # terminality of each monomer (i.e. whether or not it is a term group)
     mono_multip  = np.zeros(num_mono, dtype=int) # multiplicity of each polymer (i.e. how many times is occurs in a chain)
@@ -32,27 +30,30 @@ def estimate_chain_len_linear(monomers : MonomerGroup, DOP : int) -> int:
 
     num_term = sum(mono_term)
     num_mid  = num_mono - num_term # assumed that all monomers are either terminal or not
-    mono_multip[~mono_term] = (DOP - num_term) / num_mid # naive assumption that all middle monomers contribute rest of chain equally (for homopolymers, this is always true)
+    mono_multip[~mono_term] = (n_monomers - num_term) / num_mid # naive assumption that all middle monomers contribute rest of chain equally (for homopolymers, this is always true)
 
     N = mono_contrib @ mono_multip # compute dot product to yield final count
     
     return N
 
-def estimate_DOP_lower(monomers : MonomerGroup, max_chain_len : int, min_DOP : int=3) -> int:
-    '''Returns the largest DOP for a set of monomers which yields a chain no longer than the specified chain length'''
-    base_chain_len = estimate_chain_len_linear(monomers, min_DOP)
-    if base_chain_len > max_chain_len: # pre-check when optimization is impossible
-        raise InsufficientChainLengthError(f'Even shortest possible chain (DOP={min_DOP}, N={base_chain_len}) is longer than the specified max length of {max_chain_len} atoms')
-
-    DOP = min_DOP 
-    while estimate_chain_len_linear(monomers, DOP + 1) < max_chain_len: # check if adding 1 more monomer keeps the length below the threshold
-        DOP += 1
-
-    return DOP
-
-def estimate_DOP_upper(monomers : MonomerGroup, min_chain_len : int, min_DOP : int=3) -> int: # NOTE : as currently defined, this also subsumes the case when the estimate and calculated length are exactly equal
-    '''Returns the smallest DOP for a set of monomers which yields a chain no shorter than the specified chain length'''
-    return estimate_DOP_lower(monomers, min_chain_len, min_DOP=min_DOP) + 1 # by definition, this is just 1 monomer longer than the lower bound
-
-estimate_DOP_infimum  = estimate_DOP_upper # more descriptive aliases to alleviate confusion (originals kept in for backwards compatibility)
-estimate_DOP_supremum = estimate_DOP_lower # more descriptive aliases to alleviate confusion (originals kept in for backwards compatibility)
\ No newline at end of file
+def estimate_n_monomers_infimum(monomers : MonomerGroup, n_atoms_max : int, n_monomers_min : int=3) -> int:
+    '''
+    For a given collection of monomer fragments, returns the largest number of monomers which guarantees that
+    a polymer chain made up of those monomers will have no more than the specified maximum number of atoms
+    '''
+    n_atoms_base = estimate_n_atoms_linear(monomers, n_monomers_min)
+    if n_atoms_base > n_atoms_max: # pre-check when optimization is impossible
+        raise InsufficientChainLength(f'Even shortest possible chain ({n_monomers_min} monomers, with {n_atoms_base} atoms) is longer than the specified max length of {n_atoms_max} atoms')
+
+    n_monomers = n_monomers_min 
+    while estimate_n_atoms_linear(monomers, n_monomers + 1) < n_atoms_max: # check if adding 1 more monomer keeps the length below the threshold
+        n_monomers += 1
+
+    return n_monomers
+
+def estimate_n_monomers_supremum(monomers : MonomerGroup, n_atoms_min : int, n_monomers_min : int=3) -> int: # NOTE : as currently defined, this also subsumes the case when the estimate and calculated length are exactly equal
+    '''
+    For a given collection of monomer fragments, returns the smallest number of monomers which guarantees that
+    a polymer chain made up of those monomers will have no fewer than the specified minimum number of atoms
+    '''
+    return estimate_n_monomers_infimum(monomers, n_atoms_min, n_monomers_min=n_monomers_min) + 1 # by definition, a ny more monomers than the infimum guarantees the chain will surpass a given number of atoms
\ No newline at end of file
diff --git a/polymerist/polymers/exceptions.py b/polymerist/polymers/exceptions.py
index e7b5625..e0060f9 100644
--- a/polymerist/polymers/exceptions.py
+++ b/polymerist/polymers/exceptions.py
@@ -3,51 +3,44 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
-
-class SubstructMatchFailedError(Exception):
-    '''Raised when molecule graph isomorphism match does not form a cover'''
-    pass
-
-class InsufficientChainLengthError(Exception):
+# CHAIN LENGTH AND SHAPE ERRORS
+class InsufficientChainLength(Exception):
     '''Raised when the polymer molecule being built is too short'''
     pass
 
-class ExcessiveChainLengthError(Exception):
+class ExcessiveChainLength(Exception):
     '''Raised when the polymer molecule being built is too long'''
     pass
 
+class EndGroupDominatedChain(Exception):
+    '''Raised to indicate there are more end groups present in a chain than are monomer possibly allowed'''
+
 class MorphologyError(Exception):
     '''Raised when a polymer does not have the morphology (i.e. crosslinking, molecular weight, etc) an application expects'''
     pass
 
-class AlreadySolvatedError(Exception):
-    '''Raised when attempting to add solvent to a molecule which already has solvent'''
+# COPOLYMER SEQUENCING ERRORS
+class EmptyBlockSequence(Exception):
+    '''Raised when a trivial sequence of copolymer block (i.e. the empty string "") is provided when no expected'''
     pass
 
-class ChargeMismatchError(Exception):
-    '''Raised when attempting to merge two objects which disagree on their charging status'''
+class PartialBlockSequence(Exception):
+    '''Raised when an non-whole number of copolymer blocks is needed to reach a target chain length (and is not allowed)'''
     pass
 
-class NoSimulationsFoundError(Exception):
-    '''Raised when attempting to load a simulation for a managed molecule when none are present'''
+# POLYMERIZATION MISINFORMATION ERRORS
+class AlreadySolvated(Exception):
+    '''Raised when attempting to add solvent to a molecule which already has solvent'''
     pass
 
-class MissingStructureData(Exception):
-    '''Raised when a managed molecule has no associated structure file (e.g. PDB, SDF, etc.)'''
+class ChargeMismatch(Exception):
+    '''Raised when attempting to merge two objects which disagree on their charging status'''
     pass
 
-class MissingForceFieldData(Exception):
-    '''Raised when a forcefield is unspecified for a Simulation or Interchange'''
+class MissingStructureData(Exception):
+    '''Raised when a managed molecule has no associated structure file (e.g. PDB, SDF, etc.)'''
     pass
 
 class MissingMonomerData(Exception):
-    '''Raised when no monomer information is found for a Polymer'''
-    pass
-
-class MissingMonomerDataUncharged(MissingMonomerData):
-    '''Raised when no monomer information WITHOUT library charges is found for a Polymer'''
-    pass
-
-class MissingMonomerDataCharged(MissingMonomerData):
-    '''Raised when no monomer information WITH library charges is found for a Polymer'''
+    '''Raised when no monomer fragment information is found for a Polymer'''
     pass
diff --git a/polymerist/polymers/monographs.py b/polymerist/polymers/monographs.py
index 28f6fcd..8c9b591 100644
--- a/polymerist/polymers/monographs.py
+++ b/polymerist/polymers/monographs.py
@@ -38,7 +38,6 @@ def get_flavor_dict_at_node_index(self, node_idx : int) -> Optional[dict[int, in
     def num_monomers(self) -> int:
         '''Number of monomer units represented in the current polymer'''
         return self.number_of_nodes()
-    DOP = num_monomers
 
     @property
     def is_unbranched(self) -> bool:
diff --git a/polymerist/polymers/monomers/__init__.py b/polymerist/polymers/monomers/__init__.py
index 176a2ff..7e50795 100644
--- a/polymerist/polymers/monomers/__init__.py
+++ b/polymerist/polymers/monomers/__init__.py
@@ -3,4 +3,9 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
-from .repr import MonomerGroup # make monomer representation available at the module level
\ No newline at end of file
+from .repr import MonomerGroup # make monomer representation available at the module level
+from .fragments import (
+    PE_FRAGMENTS,
+    MPD_TMC_FRAGMENTS,
+    PEG_PLGA_FRAGMENTS,
+)
\ No newline at end of file
diff --git a/polymerist/polymers/monomers/fragments.py b/polymerist/polymers/monomers/fragments.py
new file mode 100644
index 0000000..a734e80
--- /dev/null
+++ b/polymerist/polymers/monomers/fragments.py
@@ -0,0 +1,32 @@
+'''Catalogue of monomer fragment templates for some common polymer systems'''
+
+PE_FRAGMENTS : dict[str, list[str]] = {
+    # PE (polyethylene)
+    'PE1': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:6])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:4])-[#1D1+0:5]'],
+    'PE2': ['[*:1]-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:7])-[#1D1+0:8])(-[#1D1+0:5])-[#1D1+0:6]'],
+}
+
+MPD_TMC_FRAGMENTS : dict[str, list[str]] = { # fragments for common polyamide membrane
+    # MPD (m-phenyl diamine)
+    'MPD-1': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:11])-[#6D3+0:5](-[#1D1+0:12])=[#6D3+0:6](-[#1D1+0:13])-[#6D3+0:7](-[#1D1+0:14])=[#6D3+0:8]-1-[#7D3+0:9](-[#1D1+0:15])-[#1D1+0:16])-[#1D1+0:10]'],
+    'MPD-2': ['[*:1]-[#7D3+0:2](-[#6D3+0:3]1=[#6D3+0:4](-[#1D1+0:12])-[#6D3+0:5](-[#1D1+0:13])=[#6D3+0:6](-[#1D1+0:14])-[#6D3+0:7](-[#1D1+0:15])=[#6D3+0:8]-1-[#7D3+0:9](-[*:10])-[#1D1+0:16])-[#1D1+0:11]'],
+    # TMC (trimesoyl chloride)
+    'TMC-1': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[#17D1+0:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'],
+    'TMC-2': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[#17D1+0:15]'],
+    'TMC-3': ['[#6D3+0:1]1(-[#1D1+0:16])=[#6D3+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])-[#6D3+0:6](-[#1D1+0:17])=[#6D3+0:7](-[#6D3+0:8](=[#8D1+0:9])-[*:10])-[#6D3+0:11](-[#1D1+0:18])=[#6D3+0:12]-1-[#6D3+0:13](=[#8D1+0:14])-[*:15]'],
+}
+
+PEG_PLGA_FRAGMENTS : dict[str, list[str]] = { # fragments for all variants of PEG-PLGA-like polymers
+    # PEG (ethylene glycol)
+    'PEG-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[*:4])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7])-[#1D1+0:5]'],
+    'PEG-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#8D2+0:5]-[#1D1+0:10])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'],
+    'PEG-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[*:5])(-[#1D1+0:8])-[#1D1+0:9])(-[#1D1+0:6])-[#1D1+0:7]'],
+    # PLA (lactic acid)
+    'PLA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D4+0:3](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:4](=[#8D1+0:5])-[*:6])-[#1D1+0:8])-[#1D1+0:7]'],
+    'PLA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[#8D2+0:7]-[#1D1+0:12])-[#1D1+0:8]'],
+    'PLA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D4+0:4](-[#1D1+0:9])(-[#1D1+0:10])-[#1D1+0:11])(-[#6D3+0:5](=[#8D1+0:6])-[*:7])-[#1D1+0:8]'],
+    # PGA (glycolic acid)
+    'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'],
+    'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'],
+    'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'],
+}
\ No newline at end of file
diff --git a/polymerist/polymers/monomers/repr.py b/polymerist/polymers/monomers/repr.py
index f8ebf62..0aa0d8d 100644
--- a/polymerist/polymers/monomers/repr.py
+++ b/polymerist/polymers/monomers/repr.py
@@ -3,40 +3,100 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
-from typing import Generator, Optional, TypeAlias, Union
+import logging
+LOGGER = logging.getLogger(__name__)
+
+from typing import Generator, Optional, Iterable, Union
 from dataclasses import dataclass, field
 
+from itertools import cycle
 from collections import defaultdict
+
 from rdkit import Chem
-from rdkit.Chem.rdchem import Mol
 
 from ...genutils.iteration import iter_len
 from ...genutils.fileutils.jsonio.jsonify import make_jsonifiable
-from ...rdutils.bonding.portlib import get_num_ports
 
+from ...smileslib.primitives import Smarts, is_valid_SMARTS
+from ...rdutils.bonding.portlib import get_num_ports
 
-ResidueSmarts : TypeAlias = dict[str, list[str]] # monomer SMARTS strings keyed by residue name
 
 # MAIN REPRESENTATION CLASS
 @make_jsonifiable
 @dataclass
 class MonomerGroup:
     '''Stores collections of residue-labelled monomer SMARTS'''
-    monomers : ResidueSmarts = field(default_factory=dict)
-    term_orient : dict[str, str] = field(default_factory=dict)
-
+    monomers : dict[str, Union[Smarts, list[Smarts]]] = field(default_factory=dict)
+    term_orient : dict[str, str] = field(default_factory=dict) # keys are either "head" or "tail", values are the names of residues in "monomers"
+
+    # MONOMER ADDITION AND VALIDATION
+    def __post_init__(self) -> None:
+        # Encase bare SMARTS into lists and check that all monomer SMARTS are valid
+        monomers_init = self.monomers # store inputted values
+        self.monomers = {} # clear monomers and re-add one-at-a-time
+        for resname, smarts in monomers_init.items():
+            self.add_monomer(resname, smarts)
+        # DEV: opted to forgo term_orient check for now, as modifying this violates the read-only data model aimed for here
+                
+    def _add_monomer(self, resname : str, smarts : Smarts) -> None:
+        '''Add a new monomer to the templates already stored within, subject to validation checks'''
+        if not isinstance(smarts, str): 
+            raise TypeError(f'Values of monomers must be either SMARTS strings or lists of SMARTS strings, not "{type(smarts).__name__}"')
+        # DEV: include check for empty string? (technically still a valid SMARTS string, but a pretty pathological one at that)
+        if not is_valid_SMARTS(smarts):
+            raise ValueError(f'Provided invalid monomer SMARTS string for {resname}: "{smarts}"') 
+        # DEV: decide whether or not SMILES expansion and spec-compliance should be enforced here or shunted off to the user 
+        
+        if resname in self.monomers:
+            existing_resgroup = self.monomers[resname]
+            if isinstance(existing_resgroup, list) and (smarts not in existing_resgroup):
+                LOGGER.info(f'Extending existing residue category "{resname}" with SMARTS {smarts}')
+                self.monomers[resname].append(smarts)
+        else:
+            LOGGER.info(f'Creating new residue category "{resname}", containing singular SMARTS ["{smarts}"])')
+            self.monomers[resname] = [smarts]
+            
+    def _add_monomers(self, resname : str, smarts_container : Iterable[Smarts]) -> None:
+        '''Add new monomers to the templates already stored within, subject to validation checks, from an iterable container'''
+        for smarts in smarts_container:
+            self._add_monomer(resname, smarts)
+    
+    def add_monomer(self, resname : str, smarts : Union[Smarts, Iterable[Smarts]]) -> None:
+        '''Register new monomers, either directly from SMARTS or from a container of SMARTS'''
+        if isinstance(smarts, Iterable) and not isinstance(smarts, str): # don;t want to insert one character at a time if a string is in fact provided
+            self._add_monomers(resname, smarts)
+        else:
+            self._add_monomer(resname, smarts) # assume any other inputs are singular values or strings 
+    
+    # DUNDER "MAGIC" METHODS
+    def __getitem__(self, resname : str) -> str:
+        '''Convenience method to access .monomers directly from instance'''
+        return self.monomers[resname] # NOTE: deliberately avoid "get()" here to propagate KeyError
+        # BUG: user can directly append to the returned value to forgo monomer validation checks;
+        # this is not unit to __getitem__ but rather a consequence of thinly-wrapping builtin types
+
+    def __setitem__(self, resname : str, smarts : Smarts) -> str:
+        '''Convenience method to access .monomers directly from instance'''
+        self.add_monomer(resname, smarts)
+        
+    def __hash__(self) -> int:
+        '''Hash based on monomer SMARTS and terminal orientation in a canonical order'''
+        # TOSELF: this is far from bulletproof, viz. canonicalzation of SMARTS, list value sorting, etc
+        return hash(f'{sorted(self.monomers.items())}{sorted(self.term_orient.items())}')
+    
+    # ATTRIBUTE PROPERTIES AND ALIASES
     @staticmethod
-    def is_terminal(monomer : Mol) -> bool:
+    def is_terminal(monomer : Chem.Mol) -> bool:
         '''Determine whether or not a monomer is terminal'''
         return get_num_ports(monomer) == 1
-
-    # ATTRIBUTE PROPERTIES AND ALIASES
+    
     @property
-    def SMARTS(self) -> ResidueSmarts:
+    def SMARTS(self) -> dict[str, list[Smarts]]:
         '''Alias of legacy "monomers" attribute'''
         return self.monomers # alias of legacy name for convenience
     
-    def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, Mol], None, None]:
+    # ITERATION OVER STORED MOLECULE FRAGMENTS
+    def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, Chem.Mol], None, None]:
         '''
         Generate (residue name, RDKit Mol) pairs of all monomers present
         Simplifies iteration over internal lists of monomer Mols
@@ -52,7 +112,7 @@ def iter_rdmols(self, term_only : Optional[bool]=None) -> Generator[tuple[str, M
                 if (term_only is None) or (MonomerGroup.is_terminal(monomer) == term_only):
                     yield (resname, monomer)
 
-    def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Mol]]:
+    def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Chem.Mol]]:
         '''
         Returns dict of RDKit Mol lists keyed by residue name
 
@@ -67,44 +127,66 @@ def rdmols(self, term_only : Optional[bool]=None) -> dict[str, list[Mol]]:
 
         return rdmol_dict
     
+    def contributions(self, term_only : Optional[bool]=None) -> dict[str, list[int]]:
+        '''Returns dict of the number of real (i.e. non-linker) atoms in each residue list'''
+        return {
+            resname : [mol.GetNumAtoms() - get_num_ports(mol) for mol in mol_list]
+                for resname, mol_list in self.rdmols(term_only=term_only).items()
+        }
+    
     @property
     def n_monomers(self) -> int:
-        '''Returns number of present monomers
-        Multiple monomers with the same residue name are considered distinct'''
+        '''Returns number of present monomers; multiple monomers under the same residue name are considered distinct'''
         return iter_len(self.iter_rdmols(term_only=None))
     
-    # VALIDATION AND PROPERTY CHECKS
-    @property
-    def _is_valid(self) -> bool:
-        '''Check that types and formatting are correct'''
-        for resname, SMARTS_list in self.monomers.items():
-            if not (isinstance(resname, str) and isinstance(SMARTS_list, list)):
-                return False
-        else:
-            return True # valid only if none of the SMARTS lists fail
+    # END GROUP DETERMINATION 
+    def linear_end_groups(self) -> dict[str, tuple[str, Chem.Mol]]:
+        '''
+        Returns head-and-tail end group residue names and Mol objects as defined by term_orient
         
-    @property
-    def has_valid_linear_term_orient(self) -> bool:
-        '''Check whether terminal group orientations are sufficient to define a linear polymer'''
-        return (
-            bool(self.term_orient)                                       # check that: 1) the term group orientations are non-empty / non-null...
-            and all(resname in self.monomers for resname in self.term_orient.keys()) # 2) all term group keys match a present monomer...
-            and sorted(self.term_orient.values()) == ['head', 'tail']                # 3) orientation labels are only "head" and "tail" (in either order)
-        )
+        If term orient is undefined, will automatically take then first 
+        <= 2 terminal groups available to be the end groups
+        
+        Returns
+        -------
+        end_groups : dict[str, tuple[str, Chem.Mol]]
+            A dict whose keys are any of {'head', 'tail'} and whose
+            values are 2-tuples of residue names and Mols for the corresponding monomer
+        '''
+        if self.term_orient and set(self.term_orient.keys()) == {'head', 'tail'}:
+            LOGGER.info(f'Using user-defined terminal group orientation {self.term_orient}')
+            monomer_iters = {
+                resname : cycle(smarts_list) 
+                    for resname, smarts_list in self.rdmols(term_only=True).items()
+            } # cycle handles degenerate end group case correctly
+            
+            return {
+                head_or_tail : (resname, next(monomer_iters[resname])) # will raise KeyError if any of the resnames are not present
+                    for head_or_tail, resname in self.term_orient.items()
+            }
+        else:
+            term_orient_auto : dict[str, Smarts] = {}
+            end_groups_auto  : dict[str, Chem.Mol] = {}
+            for head_or_tail, (resname, rdmol) in zip(['head', 'tail'], self.iter_rdmols(term_only=True)): # zip will bottom out early if fewer than 2 terminal monomers are present
+                term_orient_auto[head_or_tail] = resname # populate purely for logging
+                end_groups_auto[head_or_tail]  = (resname, rdmol)
+            LOGGER.warning(f'No valid terminal monomer orientations defined, auto-assigned orientations "{term_orient_auto}"; USER SHOULD VERIFY THIS YIELDS A CHEMICALLY-VALID POLYMER!')
+                
+            return end_groups_auto
     
-    # COMPOSITION AND I/O METHODS
+    # COMPOSITION METHODS
     def __add__(self, other : 'MonomerGroup') -> 'MonomerGroup':
         '''Content-aware method of merging multiple sets of monomer info via the addition operator'''
         cls = self.__class__
         if not isinstance(other, cls):
             raise NotImplementedError(f'Can only merge {cls.__name__} with another {cls.__name__}, not object of type {type(other)}')
-
+        # TODO: figure out how to handle combination of term group orientation gracefully (ignoring for now)
         return MonomerGroup(monomers={**self.monomers, **other.monomers})
 
     __radd__ = __add__ # support reverse addition
 
     # CHEMICAL INFORMATION
-    def unique(self, cap_group : Union[str, Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup':
+    def unique(self, cap_group : Union[Smarts, Chem.Mol]=Chem.MolFromSmarts('[H]-[*]')) -> 'MonomerGroup':
         '''Return a MonomerGroup containing only the unique monomers present, given a particular port saturating group (by default just a hydrogen)'''
         raise NotImplementedError
         # unique_mono = set()
diff --git a/polymerist/polymers/monomers/specification.py b/polymerist/polymers/monomers/specification.py
index 0731105..8561dd4 100644
--- a/polymerist/polymers/monomers/specification.py
+++ b/polymerist/polymers/monomers/specification.py
@@ -98,6 +98,7 @@ def compliant_atom_query_from_re_match(match : re.Match) -> str:
 
 
 # CONVERSION METHODS
+## DEV: add function to check whether a given SMARTS is COMPLETELY spec-compliant
 def compliant_mol_SMARTS(smarts : str) -> str:
     '''Convert generic SMARTS string into a spec-compliant one'''
     # initial checks
diff --git a/polymerist/smileslib/__init__.py b/polymerist/smileslib/__init__.py
index 647f88b..649e161 100644
--- a/polymerist/smileslib/__init__.py
+++ b/polymerist/smileslib/__init__.py
@@ -3,4 +3,4 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
-from .primitives import is_valid_SMILES, is_valid_SMARTS
\ No newline at end of file
+from .primitives import is_valid_SMILES, is_valid_SMARTS, Smiles, Smarts
\ No newline at end of file
diff --git a/polymerist/smileslib/primitives.py b/polymerist/smileslib/primitives.py
index 0134f3c..91c87a6 100644
--- a/polymerist/smileslib/primitives.py
+++ b/polymerist/smileslib/primitives.py
@@ -3,10 +3,24 @@
 __author__ = 'Timotej Bernat'
 __email__ = 'timotej.bernat@colorado.edu'
 
+from typing import TypeAlias
+
 from rdkit import Chem
 from rdkit.Chem.rdchem import BondType
 
 
+# VALIDATION
+Smiles : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers
+Smarts : TypeAlias = str # purely for improving self-documentation of functions, no benefit to static type-checkers
+
+def is_valid_SMARTS(smarts : Smarts) -> bool:
+    '''Check if SMARTS string is valid (according to RDKit)'''
+    return (Chem.MolFromSmarts(smarts) is not None)
+
+def is_valid_SMILES(smiles : Smiles) -> bool:
+    '''Check if SMARTS string is valid (according to RDKit)'''
+    return (Chem.MolFromSmiles(smiles) is not None)
+
 # BOND PRIMITIVES AND RELATED OBJECTS
 BOND_PRIMITIVES = '~-=#$:'
 BOND_PRIMITIVES_FOR_REGEX = r'[~\-=#$:]' # any of the SMARTS bond primitive chars, with a space to differentiate single-bond hyphen for the regex range char
@@ -36,13 +50,3 @@
             bonds_by_order[order]     = prim_str
             rdbonds_by_type[bondtype] = rd_bond
             rdbonds_by_order[order]   = rd_bond
-
-
-# VALIDATION
-def is_valid_SMARTS(smarts : str) -> bool:
-    '''Check if SMARTS string is valid (according to RDKit)'''
-    return (Chem.MolFromSmarts(smarts) is not None)
-
-def is_valid_SMILES(smiles : str) -> bool:
-    '''Check if SMARTS string is valid (according to RDKit)'''
-    return (Chem.MolFromSmiles(smiles) is not None)
\ No newline at end of file
diff --git a/polymerist/tests/genutils/textual/test_substrings.py b/polymerist/tests/genutils/textual/test_substrings.py
new file mode 100644
index 0000000..1130812
--- /dev/null
+++ b/polymerist/tests/genutils/textual/test_substrings.py
@@ -0,0 +1,66 @@
+'''Unit tests for `substrings` package'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+import pytest
+
+from polymerist.genutils.textual.substrings import unique_string, shortest_repeating_substring, repeat_string_to_length
+
+
+@pytest.mark.parametrize('string', ['Lorem', 'ipsum', 'dolor', 'sit', 'amet', 'consectetur', 'adipiscing', 'elit'])
+def test_unique_str_unordered(string : str) -> None:
+    '''Test that unique characters are coorectly identified WITHOUT respect to order'''
+    assert set(unique_string(string, preserve_order=False)) == set(string)
+
+@pytest.mark.parametrize('string, expected_output',
+    [
+        ('aaaaa', 'a'),
+        ('BABAB', 'BA'),
+        ('balaclava', 'balcv'),
+        ('catamaran', 'catmrn'),
+        ('unique', 'uniqe'), # self-reference makes everything better :P
+        ('singular', 'singular'), # test string with aready-unique characters are unaffected
+    ]
+)
+def test_unique_str_ordered(string : str, expected_output : str) -> None:
+    '''Test that unique characters are coorectly identified WITH respect to order'''
+    assert unique_string(string, preserve_order=True) == expected_output
+    
+    
+@pytest.mark.parametrize('string, expected_output',
+    [
+        ('aaaaa', 'a'),
+        ('booboo', 'boo'),
+        ('piripiri', 'piri'),
+        ('ababab', 'ab'),
+        ('bcbabcbabcba', 'bcba'),
+        # sequences which do not repeat a whole-number of times
+        ('no repeats', 'no repeats'),
+        ('ababa', 'ababa'), 
+        ('bonobo', 'bonobo'),
+    ]
+)
+def test_shortest_repeating_substring(string : str, expected_output : str) -> None:
+    '''Test that minimal repeating substrings are correctly identified'''
+    assert shortest_repeating_substring(string) == expected_output
+    
+    
+@pytest.mark.parametrize('string, target_length, joiner, expected_output',
+    [
+        ('BACA', 10, '', 'BACABACABA'), # expected "standard" use case
+        ('BACA', 1, '', 'B'),           # test case where target length is shorter than the whole string
+        ('BACA', 0, '', ''),            # test that no repeats yields the empty string
+        ('BACA', 4, '', 'BACA'),             # test precisely one repeat without joins
+        ('BACA', 10, '|', 'BACA|BACA|BA'),   # test joiners
+        ('BACA', 4, '|', 'BACA'),            # test no joiners are added when exactly one string repeat occurs
+        ('BACA', 12, '|', 'BACA|BACA|BACA'), # test no extraneous joiners are included for purely-whole number of repeats
+        ('CAT', 5, '', 'CATCA'), # test with triads (and different base string)
+        pytest.param(''   ,   7, '', None, marks=pytest.mark.xfail(raises=ValueError, reason='Empty string can\'t be repeated into nonempty string', strict=True)),
+        pytest.param('CAT', 4.2, '', None, marks=pytest.mark.xfail(raises=TypeError , reason='Non-integer string length doesn\'t make sense', strict=True)),
+        pytest.param('CAT',  -1, '', None, marks=pytest.mark.xfail(raises=IndexError, reason='Can\'t have string with fewer than 0 characters', strict=True)),
+    ]
+)
+def test_repeat_string_to_length(string : str, target_length : int, joiner : str, expected_output : str) -> None:
+    '''Test that string repetition to a given length returns the expected string WITH joingin characters present'''
+    assert repeat_string_to_length(string, target_length=target_length, joiner=joiner) == expected_output
\ No newline at end of file
diff --git a/polymerist/tests/molfiles/__init__.py b/polymerist/tests/molfiles/__init__.py
new file mode 100644
index 0000000..d92450c
--- /dev/null
+++ b/polymerist/tests/molfiles/__init__.py
@@ -0,0 +1,4 @@
+'''Unit tests for `molfiles` package'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
diff --git a/polymerist/tests/molfiles/test_pdb.py b/polymerist/tests/molfiles/test_pdb.py
new file mode 100644
index 0000000..9be9f3a
--- /dev/null
+++ b/polymerist/tests/molfiles/test_pdb.py
@@ -0,0 +1,62 @@
+'''Unit tests for PDB file I/O utils'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+import pytest
+from polymerist.molfiles.pdb import SerialAtomLabeller
+
+
+ELEMS : tuple[str] = ('C', 'H', 'H', 'H', 'N', 'H', 'C', 'O', 'Cl') # atoms for methylcarbamoyl chloride (MCC)
+
+@pytest.mark.parametrize(
+    'mol_atom_elems, atom_label_width, include_elem_idx, default_elem_idx, expected_labels',
+    [
+        (ELEMS, 4, True, 0, ['C000', 'H000', 'H001', 'H002', 'N000', 'H003', 'C001', 'O000', 'Cl00']), # test with default PDD-compatible settings
+        (ELEMS, 4, True, 2, ['C002', 'H002', 'H003', 'H004', 'N002', 'H005', 'C003', 'O002', 'Cl02']), # test element index offset
+        (ELEMS, 3, True, 2, ['C02', 'H02', 'H03', 'H04', 'N02', 'H05', 'C03', 'O02', 'Cl2']), # test shorter atom label width
+        (ELEMS, 1, True, 0, ['C', 'H', 'H', 'H', 'N', 'H', 'C', 'O', 'C']), # test truncation works below threshold where indices can be written
+        (ELEMS, 4, False, 0, ['C   ', 'H   ', 'H   ', 'H   ', 'N   ', 'H   ', 'C   ', 'O   ', 'Cl  ']), # test without element indices
+        (ELEMS, 4, False, 7, ['C   ', 'H   ', 'H   ', 'H   ', 'N   ', 'H   ', 'C   ', 'O   ', 'Cl  ']), # test that default indices has no impact when indices aren't present
+        (ELEMS, 0, False, 0, ['', '', '', '', '', '', '', '', '']), # test null-width labels
+        # Invalid input handling checks
+        pytest.param(
+            ELEMS, -1, True, 0, [], # test that negative label width is rejected as intended
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason='Negative atom label widths not allowed',
+                strict=True,
+            )
+        ),
+        pytest.param(
+            ELEMS, 4, True, -5, [], # test that negative default indices are rejected as intended
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason='Negative element indices not allowed',
+                strict=True,
+            )
+        ),
+        pytest.param(
+            tuple(len(elem) for elem in ELEMS), 4, True, 0, [], # test that negative default indices are rejected as intended
+            marks=pytest.mark.xfail(
+                raises=TypeError,
+                reason='Must pass atom elements as strings',
+                strict=True,
+            )
+        ),
+    ]
+)
+def test_atom_labeller(
+        mol_atom_elems : tuple[str],
+        atom_label_width : int,
+        include_elem_idx : bool,
+        default_elem_idx : int,
+        expected_labels : list[str],
+    ) -> None:
+    '''Test that atom labelling hebaves as expected with various label formatting configurations'''
+    labeller = SerialAtomLabeller(
+        atom_label_width=atom_label_width,
+        include_elem_idx=include_elem_idx,
+        default_elem_idx=default_elem_idx,
+    )
+    assert [labeller.get_atom_label(elem) for elem in mol_atom_elems] == expected_labels
\ No newline at end of file
diff --git a/polymerist/tests/polymers/building/__init__.py b/polymerist/tests/polymers/building/__init__.py
new file mode 100644
index 0000000..f4f43b4
--- /dev/null
+++ b/polymerist/tests/polymers/building/__init__.py
@@ -0,0 +1,4 @@
+'''Unit tests for `building` package'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
\ No newline at end of file
diff --git a/polymerist/tests/polymers/building/test_linear.py b/polymerist/tests/polymers/building/test_linear.py
new file mode 100644
index 0000000..2c086a9
--- /dev/null
+++ b/polymerist/tests/polymers/building/test_linear.py
@@ -0,0 +1,135 @@
+'''Tests construction of structures for linear copolymers (and relevant subfamilies, e.g. homopolymers)'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+import pytest
+
+from collections import Counter
+
+from polymerist.polymers.monomers.repr import MonomerGroup
+from polymerist.polymers.monomers.fragments import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS
+
+from polymerist.polymers.building import build_linear_polymer
+from polymerist.polymers.exceptions import MorphologyError, PartialBlockSequence, EmptyBlockSequence
+
+
+@pytest.fixture(scope='function')
+def monogrp_polyethylene() ->  MonomerGroup:
+    return MonomerGroup(monomers=PE_FRAGMENTS)
+
+@pytest.fixture(scope='function')
+def monogrp_mpd_tmc() ->  MonomerGroup:
+    return MonomerGroup(monomers=MPD_TMC_FRAGMENTS)
+
+@pytest.fixture(scope='function')
+def monogrp_peg_plga() ->  MonomerGroup:
+    return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS)
+
+
+@pytest.mark.parametrize(
+    'monomers, term_orient, n_monomers, sequence, minimize_sequence, allow_partial_sequences, energy_minimize',
+    [
+        # Polyethylene
+        ('monogrp_polyethylene', {}, 7, 'A', True, True, False), # test end group autogen (should only have 1 term group)
+        ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, True, False), # test explicit head-tail (result here should be different from autogen structure)
+        ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, False, False), # test partial sequences (irrelevant here)
+        ('monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'A', True, False, True), # test energy minimization doesn't crash
+        pytest.param(  # will fail due to too few monomers for given sequence - 
+            'monogrp_polyethylene', {}, 7, '', True, True, False, # NOTE: need to have partials enabled, since failure happens ONLY once sequence is passed to mbuild
+            marks=pytest.mark.xfail(
+                raises=EmptyBlockSequence,
+                reason='Sequence provided must be nonempty',
+                strict=True,
+            )
+        ),
+        pytest.param(  # will fail due to too few monomers for given sequence - 
+            'monogrp_polyethylene', {'head':'PE1', 'tail' : 'PE1'}, 7, 'AB', True, True, False, # NOTE: need to have partials enabled, since failure happens ONLY once sequence is passed to mbuild
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason='Fewer unique monomers defined than called for by target sequence',
+                strict=True,
+            )
+        ),
+        # MPD-TMC
+        ('monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 8, 'A', True, True, False), # correctly-specified: explicit end groups, only linear middle monomers, and whole number of sequence repeats
+        pytest.param(
+            'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 7, 'AB', True, False, False, # will fail due to partial sequence
+            marks=pytest.mark.xfail(
+                raises=PartialBlockSequence,
+                reason='Partial sequence repeat needed to get odd number block out of AB, but partial blocks are disabled',
+                strict=True,
+            )
+        ),
+        pytest.param(
+            'monogrp_mpd_tmc', {'head':'MPD-1', 'tail' : 'TMC-1'}, 12, 'ABC', True, True, False, # will fail due to 3-functional TMC middle monomer as C
+            marks=pytest.mark.xfail(
+                raises=MorphologyError,
+                reason='One of the monomers requested is non-linear (3-functional)',
+                strict=True,
+            )
+        ),
+        # PEG-PLGA
+        ('monogrp_peg_plga', {}, 15, 'ABC', True, True, False), # test autogen
+        ('monogrp_peg_plga', {}, 17, 'ABC', True, False, False), # test autogen with whole sequence
+        ('monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, True, False), # test more complex sequence with non-default explicit end groups
+        pytest.param(
+            'monogrp_peg_plga', {'head':'PGA-1A', 'tail' : 'PGA-1B'}, 15, 'ABC', True, False, False, # will fail due to partial sequence
+            marks=pytest.mark.xfail(
+                raises=PartialBlockSequence,
+                reason='Partial sequence repeat needed to get odd number block out of AB, but partial blocks are disabled',
+                strict=True,
+            )
+        ),
+        ('monogrp_peg_plga', {}, 40, 'ABCB', True, True, True), # test longer energy min
+    ]
+)
+def test_build_linear_polymer(
+        monomers : MonomerGroup,
+        term_orient : dict[str, str],
+        n_monomers : int,
+        sequence : str,
+        minimize_sequence : bool,
+        allow_partial_sequences : bool,
+        energy_minimize : bool,
+        request : pytest.FixtureRequest, # allows for fixture expansion in parameterized arguments
+    ) -> None:
+    '''Test linear polymer builder behavior under varing sets of parameters'''
+    monomers = request.getfixturevalue(monomers) # unpack fixtures into their respective values
+    monomers.term_orient = term_orient # this edit makes it VITAL that fixtures be function-level
+    
+    polymer = build_linear_polymer(
+        monomers=monomers,
+        n_monomers=n_monomers,
+        sequence=sequence,
+        minimize_sequence=minimize_sequence,
+        allow_partial_sequences=allow_partial_sequences,
+        add_Hs=False,
+        energy_minimize=energy_minimize,
+    )
+    
+    # characterize middle monomers
+    n_rep_units = len(polymer.children)
+    residue_sizes : dict[str, int] = {}
+    residue_counts = Counter() # TODO: make use of this for checks!!
+    for middle_monomers in polymer.children:
+        residue_sizes[middle_monomers.name] = middle_monomers.n_particles
+        residue_counts[middle_monomers.name] += 1
+        
+    # characterize end groups
+    end_groups_requested = set(resname for head_or_tail, (resname, mol) in monomers.linear_end_groups().items())
+    end_groups_used = set()
+    for end_group in polymer.end_groups:
+        if end_group is not None:
+            end_groups_used.add(end_group.name)
+            residue_sizes[end_group.name] = end_group.n_particles
+            residue_counts[middle_monomers.name] += 1
+    
+    total_reps_match = (n_rep_units == n_monomers)
+    contribs_match = all(num_monomers == monomers.contributions()[resname][0]
+        for resname, num_monomers in residue_sizes.items()
+    )
+    end_groups_correct = (end_groups_used == end_groups_requested)
+    # counts_match = ...
+    
+    assert all([total_reps_match, contribs_match, end_groups_correct]) #, and counts_match    )
\ No newline at end of file
diff --git a/polymerist/tests/polymers/building/test_sequencing.py b/polymerist/tests/polymers/building/test_sequencing.py
new file mode 100644
index 0000000..272fd4d
--- /dev/null
+++ b/polymerist/tests/polymers/building/test_sequencing.py
@@ -0,0 +1,123 @@
+'''Testing that copolymer sequencing scales (and fails) as expected'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+from typing import Any
+from dataclasses import asdict
+
+import pytest
+from pathlib import Path
+
+from polymerist.polymers.building.sequencing import LinearCopolymerSequencer as LCS
+from polymerist.polymers.exceptions import EmptyBlockSequence, PartialBlockSequence, InsufficientChainLength, EndGroupDominatedChain
+
+
+@pytest.fixture
+def sequencer() -> LCS:
+    '''A sample sequencer with known, valid inputs'''
+    return LCS(sequence_kernel='ABAB', n_repeat_units=14, n_repeat_units_terminal=2)
+
+@pytest.mark.parametrize(
+    'inputs',
+    [
+        {
+            'sequence_kernel' : 'AB',
+            'n_repeat_units' : 10,
+            'n_repeat_units_terminal' : 1
+        },
+        pytest.param(
+            {
+                'sequence_kernel' : 'BAC',
+                'n_repeat_units' : 1,
+                'n_repeat_units_terminal' : 2
+            },
+            marks=pytest.mark.xfail(
+                raises=EndGroupDominatedChain,
+                reason='Results in (unsatisfiable) negative number of middle monomers',
+                strict=True,
+            )
+        ),
+        pytest.param(
+            {
+                'sequence_kernel' : '',
+                'n_repeat_units' : 7,
+                'n_repeat_units_terminal' : 1
+            },
+            marks=pytest.mark.xfail(
+                raises=EmptyBlockSequence,
+                reason='No sequence kernel provided',
+                strict=True,
+            )
+        ), 
+    ]
+)
+def test_LCS_input_validation(inputs : dict[str, Any]) -> None:
+    '''Test that invalid Sequence input are correctly rejected'''
+    _ = LCS(**inputs) # no assert needed, just checking when initialization completes
+    
+def test_LCS_copying(sequencer : LCS) -> None:
+    '''Test that sequencers are properly copied in a read-only manner'''
+    sequencer_clone = sequencer.copy()
+    
+    # tamper with the parameters of the copy in a way that guarantees distinctness
+    sequencer_clone.sequence_kernel = 2*sequencer.sequence_kernel
+    sequencer_clone.n_repeat_units += 2
+    sequencer_clone.n_repeat_units_terminal += 1
+    
+    # check that the original WASN'T tampered with
+    assert asdict(sequencer) != asdict(sequencer_clone)
+    
+    
+@pytest.mark.parametrize(
+    'sequencer, expected_kernel',
+    [
+        (LCS('ABC', n_repeat_units=12), 'ABC') , # test irrreducible case
+        (LCS('ABAB', n_repeat_units=12), 'AB'), # test unreduced case
+    ]
+)
+def test_LCS_reduction(sequencer : LCS, expected_kernel : str) -> None:
+    '''Test that shortest repeating subsequences of sequencer kernels are correctly identified'''
+    sequencer.reduce()
+    assert sequencer.sequence_kernel == expected_kernel
+    
+@pytest.mark.parametrize(
+    'sequencer, allow_partials, expected_sequence, expected_length',
+    [
+        # tests for homopolymers
+        (LCS('A', 5, 1), True , 'A', 4),
+        (LCS('A', 5, 1), False, 'A', 4), # partial block single-monomer sequence will never exist, so "allow_partial_sequences" setting shouldn't matter)
+        pytest.param(
+            LCS('A', 1, 1), True, 'A', 1, # test that all-end group (i.e. no middle monomer) case is correctly rejected
+            marks=pytest.mark.xfail(
+                raises=InsufficientChainLength,
+                reason='No middle monomers can be accomodated',
+                strict=True,
+            ),
+        ),
+        # tests for "true" copolymers
+        (LCS('ABC', 10, 2), True, 'ABCABCAB', 1),
+        pytest.param(
+            LCS('ABC', 10, 2), False, 'ABCABCAB', 1, # test that partial-sequence ban correctly blocks partial sequences...
+            marks=pytest.mark.xfail(
+                raises=PartialBlockSequence,
+                reason='Partial sequence repeats have not been allowed',
+                strict=True,
+            ),
+        ),
+        (LCS('ABC', 11, 2), False, 'ABC', 3), # ...unless the resulting sequence happens to be a whole multiple
+        pytest.param(
+            LCS('ABC', 2, 2), True, '', 1, # test that all-end group (i.e. no middle monomer) case is correctly rejected...
+            marks=pytest.mark.xfail(
+                raises=InsufficientChainLength,
+                reason='No middle monomers can be accomodated',
+                strict=True,
+            ),
+        ),
+        (LCS('ABC', 4, 2), True, 'AB', 1), # ... and finally, check that nonempty sequences SMALLER than the kernel are also recognized if partials are permitted
+    ]
+)
+def test_LCS_procrustean_alignment(sequencer : LCS, allow_partials : bool, expected_sequence : str, expected_length : int) -> None:
+    '''Test capability (and prechecks) for fitting sequence to target chain length'''
+    seq, n_reps = sequencer.procrustean_alignment(allow_partial_sequences=allow_partials)
+    assert (seq == expected_sequence) and (n_reps == expected_length)
\ No newline at end of file
diff --git a/polymerist/tests/polymers/monomers/test_repr.py b/polymerist/tests/polymers/monomers/test_repr.py
new file mode 100644
index 0000000..bf93cdf
--- /dev/null
+++ b/polymerist/tests/polymers/monomers/test_repr.py
@@ -0,0 +1,192 @@
+'''Tests that collections of monomer fragments are treated as expected'''
+
+__author__ = 'Timotej Bernat'
+__email__ = 'timotej.bernat@colorado.edu'
+
+from typing import Any
+
+import pytest
+
+from polymerist.polymers.monomers.repr import MonomerGroup
+from polymerist.polymers.monomers.fragments import PE_FRAGMENTS, MPD_TMC_FRAGMENTS, PEG_PLGA_FRAGMENTS
+
+
+# Example fragments groups
+@pytest.fixture(scope='function') # want to re-initialize for each test function to avoid cross-contamination
+def monogrp_degenerate() ->  MonomerGroup:
+    return MonomerGroup(monomers={})
+
+@pytest.fixture(scope='function')
+def monogrp_polyethylene() ->  MonomerGroup:
+    return MonomerGroup(monomers=PE_FRAGMENTS)
+
+@pytest.fixture(scope='function')
+def monogrp_mpd_tmc() ->  MonomerGroup:
+    return MonomerGroup(monomers=MPD_TMC_FRAGMENTS)
+
+@pytest.fixture(scope='function')
+def monogrp_peg_plga() ->  MonomerGroup:
+    return MonomerGroup(monomers=PEG_PLGA_FRAGMENTS)
+
+
+# Testing all routes to initialization
+@pytest.mark.parametrize(
+    'monomers',
+    [
+        { # nominal test case
+            'PGA-1A': ['[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'],
+            'PGA-1B': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]'],
+            'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'],
+        },
+        { # test that list closure autofill works
+            'PGA-1A': '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]',
+            'PGA-1B': '[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[#8D2+0:6]-[#1D1+0:9])(-[#1D1+0:7])-[#1D1+0:8]',
+            'PGA-2': '[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]',
+        },
+        # XFAILS: test that the initializer rejects...
+        pytest.param(
+            { # ...1) non-string like objects
+                'foo' : 42.0,
+                'bar' : True,
+            },
+            marks=pytest.mark.xfail(
+                raises=TypeError,
+                reason='Monomer fragment inputs are not stringlike',
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            { #  1a) more subtly, list OF CONTAINERS of valid SMARTS are still invalid
+                'PGA-1A': [( 
+                    '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]',
+                    '[#8D2+0:1](-[#6D4+0:2](-[#6D3+0:3](=[#8D1+0:4])-[*:5])(-[#1D1+0:7])-[#1D1+0:8])-[#1D1+0:6]'
+                )],
+                'PGA-2': ['[*:1]-[#8D2+0:2]-[#6D4+0:3](-[#6D3+0:4](=[#8D1+0:5])-[*:6])(-[#1D1+0:7])-[#1D1+0:8]'],
+            },
+            marks=pytest.mark.xfail(
+                raises=TypeError,
+                reason='Monomer fragment inputs are not stringlike',
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            { # ...3) non-empty strings which are nevertheless invalid SMARTS 
+             #- NOTE: empty strings, perhaps surprisingly, actually ARE valid as SMARTS and therefore aren't xfail tested here
+                'fake-1': ['this is a bogus SMARTS'],
+                'invalid-2': ['so_is_this'],
+            },
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason='At least one monomer fragment input is not valid a SMARTS string',
+                strict=True,
+            ),
+        ),
+        pytest.param(
+            { # ...3a) this one is very subtle, but SMARTS with slight errors which invalidate them as SMARTS should also fail
+                'PGA-1A': ['[OH]CD(=O)*'], # fat-finger mistake, "D" should be "C"
+                'PGA-2': ['*OCC(+O)*'], # forgot to hit shift when typing double bond
+            },
+            marks=pytest.mark.xfail(
+                raises=ValueError,
+                reason='At least one monomer fragment input is not valid a SMARTS string',
+                strict=True,
+            ),
+        ),
+    ]
+)
+def test_monogrp_init(monomers : dict[str, Any]) -> None:
+    '''Check that the MonomerGroup initializer handles valid inputs as expected and reject invalid inputs'''
+    _ = MonomerGroup(monomers=monomers) # no assert needed, just checking when initialization completes
+    
+# Testing properties of contained monomers
+@pytest.mark.parametrize(
+    'monogrp, expected_is_linear',
+    [
+        ('monogrp_peg_plga', True),
+        ('monogrp_mpd_tmc', False),
+    ],
+)
+def test_monogrp_linearity(monogrp : MonomerGroup, expected_is_linear : bool, request : pytest.FixtureRequest) -> None:
+    '''Test whether branched and unbranched chain fragment detection behaves as expected'''
+    monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values
+    assert monogrp.is_linear == expected_is_linear
+    
+@pytest.mark.parametrize(
+    'monogrp, expected_counts',
+    [
+        ('monogrp_peg_plga', (3, 6)),
+        ('monogrp_mpd_tmc', (3, 2)),
+    ],
+)
+def test_monogrp_mid_and_term_counts(monogrp : MonomerGroup, expected_counts : tuple[int, int], request : pytest.FixtureRequest) -> None:
+    '''Test whether middle and terminal monomers are counted correctly'''
+    monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values
+    assert monogrp.num_mid_and_term == expected_counts
+    
+# Testing end group determination
+@pytest.mark.parametrize(
+    'monogrp, term_orient, expected_end_groups',
+    [
+        # 1) test autogeneration of orientations when...
+        ( # ...term orientation is unspecified but can be completed for both ends (i.e. at least 2 terminal monomers are available)
+            'monogrp_peg_plga',
+            {}, 
+            {'head' : 'PEG-1A', 'tail' : 'PEG-1B'},
+        ), 
+        ( # ...term orientation is unspecified and can only be partially completed (i.e. fewer than 2 terminal monomers are available)
+            'monogrp_polyethylene',
+            {}, 
+            {'head' : 'PE1'},
+        ), 
+        ( # ...term orientation is unspecified and no end monomers are available for auto-assignment
+            'monogrp_degenerate',
+            {}, 
+            {},
+        ), 
+        ( # ...term orientation is unspecified but can be completed for both ends (i.e. at least 2 terminal monomers are available)
+            'monogrp_peg_plga',
+            {}, 
+            {'head' : 'PEG-1A', 'tail' : 'PEG-1B'},
+        ), 
+        # 2) test end group identification for correctly-specified term orientation
+        ( # test nominal case
+            'monogrp_peg_plga',
+            {'head' : 'PGA-1A', 'tail' : 'PEG-1B'},
+            {'head' : 'PGA-1A', 'tail' : 'PEG-1B'},
+        ), 
+        ( # test that duplication works as expected
+            'monogrp_polyethylene',
+            {'head' : 'PE1', 'tail' : 'PE1'},
+            {'head' : 'PE1', 'tail' : 'PE1'},
+        ), 
+        # 3) test incorrect specifications
+        ( # specification without "head"/"tail" keys will not fail, but WILL default to auto-gen
+            'monogrp_peg_plga',
+            {'first' : 'PGA-1A', 'second' : 'PEG-1B'},
+            {'head' : 'PEG-1A', 'tail' : 'PEG-1B'},
+        ), 
+        pytest.param( # specification with invalid monomer names (i.e. keys not in the "monomers" dict) should raise outright error
+            'monogrp_peg_plga',
+            {'head' : 'PGG-2C', 'tail' : 'BOGUS'},
+            None,
+            marks=pytest.mark.xfail(
+                raises=KeyError,
+                reason='Term group names specified don;t existing within the monomer fragments defined',
+                strict=True,
+            )
+        ),
+    ],
+)
+def test_monogrp_end_groups(monogrp : MonomerGroup, term_orient : dict[str, str], expected_end_groups : dict[str, str], request : pytest.FixtureRequest) -> None:
+    '''Test whether procedural end group determination'''
+    monogrp = request.getfixturevalue(monogrp) # unpack fixtures into their respective values
+    monogrp.term_orient = term_orient
+    
+    end_group_catalogue = monogrp.linear_end_groups()
+    end_group_names = {
+        head_or_tail : resname  # drop RDKit Mol for check (Mol object is harder to validate, use bound name as proxy)
+            for head_or_tail, (resname, _) in end_group_catalogue.items()
+    }
+    
+    assert end_group_names == expected_end_groups
+    
\ No newline at end of file