diff --git a/corona.py b/corona.py
index 9110aab..849290e 100644
--- a/corona.py
+++ b/corona.py
@@ -1,4 +1,4 @@
-from lib import cc, translate
+from lib import genome, translate
 # entire diff: https://www.ncbi.nlm.nih.gov/projects/msaviewer/?rid=7FYNU14F01R&coloring=
 # protein alignments: http://virological.org/t/alignment-of-58-sarbecovirus-genomes-for-conservation-analysis-of-sars-cov-2/430
 
@@ -44,15 +44,17 @@
 #        https://en.wikipedia.org/wiki/MRNA_(nucleoside-2%27-O-)-methyltransferase
 
 # in front "the untranslated leader sequence that ends with the Transcription Regulation Sequence"
-corona['untranslated_region'] = cc[0:265]
+corona['untranslated_region'] = genome.get_nucleotideSequence()[0:265]
 
-corona['orf1a'] = translate(cc[266-1:13483], True)
+corona['orf1a'] = translate(genome.get_nucleotideSequence()[266-1:13483], True)
 
 # cc[266-1+4398*3:13468] = 'TTT_TTA_AAC' aka 'X_XXY_YYZ'
 # https://en.wikipedia.org/wiki/Ribosomal_frameshift
 # Programmed −1 Ribosomal Frameshifting
 # TODO: add this to the translate function with automatic detection
-corona['orf1b'] = translate(cc[13468-1:21555], False).strip("*")  # chop off the stop, note this doesn't have a start
+# chop off the stop, note this doesn't have a start
+corona['orf1b'] = translate(genome.get_nucleotideSequence()[
+                            13468-1:21555], False).strip("*")
 
 # exploit vector, this attaches to ACE2. also called "surface glycoprotein"
 # https://www.ncbi.nlm.nih.gov/Structure/pdb/6VYB -- open state
@@ -62,28 +64,37 @@
 #   S2  = 686-1273
 #   S2' = 816-1273
 # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2750777/
-corona['spike_glycoprotein'] = translate(cc[21563-1:25384], True)
+corona['spike_glycoprotein'] = translate(
+    genome.get_nucleotideSequence()[21563-1:25384], True)
 
 # Forms homotetrameric potassium sensitive ion channels (viroporin) and may modulate virus release.
-corona['orf3a'] = translate(cc[25393-1:26220], True)
+corona['orf3a'] = translate(
+    genome.get_nucleotideSequence()[25393-1:26220], True)
 
 # these two things stick out, used in assembly aka they package the virus
-corona['envelope_protein'] = translate(cc[26245-1:26472], True)  # also known as small membrane
-corona['membrane_glycoprotein'] = translate(cc[26523-1:27191], True)
+corona['envelope_protein'] = translate(
+    genome.get_nucleotideSequence()[26245-1:26472], True)  # also known as small membrane
+corona['membrane_glycoprotein'] = translate(
+    genome.get_nucleotideSequence()[26523-1:27191], True)
 
-corona['orf6'] = translate(cc[27202-1:27387], True)
+corona['orf6'] = translate(
+    genome.get_nucleotideSequence()[27202-1:27387], True)
 
-corona['orf7a'] = translate(cc[27394-1:27759], True)
-corona['orf7b'] = translate(cc[27756-1:27887], True)  # is this one real?
+corona['orf7a'] = translate(
+    genome.get_nucleotideSequence()[27394-1:27759], True)
+corona['orf7b'] = translate(genome.get_nucleotideSequence()[
+                            27756-1:27887], True)  # is this one real?
 
-corona['orf8'] = translate(cc[27894-1:28259], True)
+corona['orf8'] = translate(
+    genome.get_nucleotideSequence()[27894-1:28259], True)
 
 # https://en.wikipedia.org/wiki/Capsid
 # Packages the positive strand viral genome RNA into a helical ribonucleocapsid
 # Includes the "internal" protein (from Coronavirus Pathogenesis)
 # https://www.sciencedirect.com/topics/veterinary-science-and-veterinary-medicine/human-coronavirus-oc43
-corona['nucleocapsid_phosphoprotein'] = translate(cc[28274-1:29533], True)
+corona['nucleocapsid_phosphoprotein'] = translate(
+    genome.get_nucleotideSequence()[28274-1:29533], True)
 
 # might be called the internal protein (Coronavirus Pathogenesis)
-corona['orf10'] = translate(cc[29558-1:29674], True)
-
+corona['orf10'] = translate(
+    genome.get_nucleotideSequence()[29558-1:29674], True)
diff --git a/fold.py b/fold.py
index 50b2e22..c3ea741 100755
--- a/fold.py
+++ b/fold.py
@@ -10,7 +10,8 @@
 parser = argparse.ArgumentParser(description='Fold some proteins.')
 parser.add_argument('--scratch', action='store_true')
 parser.add_argument('--temp', type=int, default=300)
-parser.add_argument('--steps', type=int, default=100000, help="2500000000 should fold the protein")
+parser.add_argument('--steps', type=int, default=100000,
+                    help="2500000000 should fold the protein")
 parser.add_argument('--writes', type=int, default=1000, help="default is 1000")
 parser.add_argument('--out', type=str, default="/tmp/output.pdb")
 parser.add_argument('--pdb', type=str, default="proteins/villin/1vii.pdb")
@@ -18,24 +19,24 @@
 args = parser.parse_args(sys.argv[1:])
 
 try:
-  platform = Platform.getPlatformByName("CUDA")
+    platform = Platform.getPlatformByName("CUDA")
 except Exception:
-  platform = Platform.getPlatformByName("OpenCL")
+    platform = Platform.getPlatformByName("OpenCL")
 
 if args.scratch:
-  # unfolded protein
-  if args.fasta is not None:
-    fasta = args.fasta
-  else:
-    protein_fasta = "proteins/villin/1vii.fasta"
-    fasta = open(protein_fasta).read().split("\n")[1]
-  print("folding %s" % fasta)
-  from lib import write_unfolded
-  write_unfolded(fasta, "/tmp/unfolded.pdb")
-  pdb = PDBFile("/tmp/unfolded.pdb")
+    # unfolded protein
+    if args.fasta is not None:
+        fasta = args.fasta
+    else:
+        protein_fasta = "proteins/villin/1vii.fasta"
+        fasta = open(protein_fasta).read().split("\n")[1]
+    print("folding %s" % fasta)
+    from lib import write_unfolded
+    write_unfolded(fasta, "/tmp/unfolded.pdb")
+    pdb = PDBFile("/tmp/unfolded.pdb")
 else:
-  # already folded protein
-  pdb = PDBFile(args.pdb)
+    # already folded protein
+    pdb = PDBFile(args.pdb)
 
 #forcefield = ForceField('amber99sb.xml', 'tip3p.xml')
 forcefield = ForceField('amber03.xml', 'amber03_obc.xml')
@@ -45,9 +46,10 @@
 print(modeller.topology)
 
 system = forcefield.createSystem(modeller.topology,
-  implicitSolvent=OBC2,   # matches https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2980750/#bib39
-  nonbondedMethod=NoCutoff, nonbondedCutoff=1*nanometer,
-  constraints=HBonds)
+                                 # matches https://www.ncbi.nlm.nih.gov/pmc/articles/PMC2980750/#bib39
+                                 implicitSolvent=OBC2,
+                                 nonbondedMethod=NoCutoff, nonbondedCutoff=1*nanometer,
+                                 constraints=HBonds)
 integrator = LangevinIntegrator(args.temp*kelvin, 1/picosecond, 2*femtoseconds)
 simulation = Simulation(modeller.topology, system, integrator, platform)
 simulation.context.setPositions(modeller.positions)
@@ -57,6 +59,6 @@
 steps_write = max(1, steps//args.writes)
 print("writing every %d steps" % steps_write)
 simulation.reporters.append(PDBReporter(args.out, steps_write))
-simulation.reporters.append(StateDataReporter(stdout, steps_write, step=True, potentialEnergy=True, temperature=True))
+simulation.reporters.append(StateDataReporter(
+    stdout, steps_write, step=True, potentialEnergy=True, temperature=True))
 simulation.step(steps)
-
diff --git a/generic-genome.py b/generic-genome.py
new file mode 100644
index 0000000..1f8a530
--- /dev/null
+++ b/generic-genome.py
@@ -0,0 +1,20 @@
+class Genome:
+    def __init__(self, chromosomes, genes, dnscode):
+        self.chromosomes = chromosomes
+
+    def get_nucleotideSequence(self):
+        return self.chromosomes
+
+
+class GenomeBuilder():
+    def add_chromosomes(self, chromosomes):
+        self.chromosomes = chromosomes
+
+    def add_genes(self, genes):
+        self.genes = genes
+
+    def add_dnscode(self, dnscode):
+        self.dnscode = dnscode
+
+    def build(self):
+        return Genome(self.chromosomes, self.genes, self.dnscode)
diff --git a/genome.py b/genome.py
new file mode 100644
index 0000000..896e087
--- /dev/null
+++ b/genome.py
@@ -0,0 +1,21 @@
+class Genome:
+    def __init__(self, nucleotideSequence):
+        self.nucleotideSequence = nucleotideSequence
+
+    def get_nucleotideSequence(self):
+        return self.nucleotideSequence
+
+
+class GenomeBuilder():
+    def __init__(self, nucleotideSequence):
+        self.genome = Genome(nucleotideSequence)
+
+    def build(self):
+        if(self.validate()):
+            return self.genome
+        else:
+            return None
+
+    def validate(self):
+        my_set = set('ACGTU')
+        return set(self.genome.get_nucleotideSequence).issubset(my_set)
diff --git a/lib.py b/lib.py
index f440f4c..eec4267 100644
--- a/lib.py
+++ b/lib.py
@@ -1,5 +1,9 @@
+import pathlib
+import json
+import os
 import random
-
+from genome import Genome
+from genome import GenomeBuilder
 # Asn or Asp / B  AAU, AAC; GAU, GAC
 # Gln or Glu / Z  CAA, CAG; GAA, GAG
 # START AUG
@@ -27,87 +31,95 @@
 """.strip()
 dec = {}
 for t in tt.split("\n"):
-  k = t[:len("Val / V")].strip()
-  v = t[len("Val / V "):]
-  if '/' in k:
-    k = k.split("/")[-1].strip()
-  k = k.replace("STOP", "*")
-  v = v.replace(",", "").replace(";", "").lower().replace("u", "t").split(" ")
-  for vv in v:
-    if vv in dec:
-      print("dup", vv)
-    dec[vv.strip()] = k
+    k = t[:len("Val / V")].strip()
+    v = t[len("Val / V "):]
+    if '/' in k:
+        k = k.split("/")[-1].strip()
+    k = k.replace("STOP", "*")
+    v = v.replace(",", "").replace(
+        ";", "").lower().replace("u", "t").split(" ")
+    for vv in v:
+        if vv in dec:
+            print("dup", vv)
+        dec[vv.strip()] = k
+
 
 def translate(x, protein=False):
-  x = x.lower()
-  aa = []
-  for i in range(0, len(x)-2, 3):
-    aa.append(dec[x[i:i+3]])
-  aa = ''.join(aa)
-  if protein:
-    if aa[0] != "M" or aa[-1] != "*":
-      print("BAD PROTEIN")
-      print(aa)
-      return None
-    aa = aa[:-1]
-  return aa
+    x = x.lower()
+    aa = []
+    for i in range(0, len(x)-2, 3):
+        aa.append(dec[x[i:i+3]])
+    aa = ''.join(aa)
+    if protein:
+        if aa[0] != "M" or aa[-1] != "*":
+            print("BAD PROTEIN")
+            print(aa)
+            return None
+        aa = aa[:-1]
+    return aa
+
 
 ltl = 'Asp D Glu E Arg R Lys K His H Asn N Gln Q Ser S Thr T Tyr Y Ala A Gly G Val V Leu L Ile I Pro P Phe F Met M Trp W Cys C'
 ltl = ltl.split(" ")
 ltl = dict(zip(ltl[1::2], ltl[0::2]))
 
+
 def get_atoms():
-  from data import get_amber99sb
-  amber99sb = get_amber99sb()
-  residues = amber99sb.getElementsByTagName("Residue")
-  atoms = {}
-  for r in residues:
-    name = r.attributes['name'].value
-    atoms[name] = [x.attributes['name'].value for x in r.getElementsByTagName("Atom")]
-  return atoms
+    from data import get_amber99sb
+    amber99sb = get_amber99sb()
+    residues = amber99sb.getElementsByTagName("Residue")
+    atoms = {}
+    for r in residues:
+        name = r.attributes['name'].value
+        atoms[name] = [
+            x.attributes['name'].value for x in r.getElementsByTagName("Atom")]
+    return atoms
+
 
 def write_unfolded(fasta, fn):
-  atoms = get_atoms()
-  atom_num = 1
-  res_num = 1
-  ss = []
-  random.seed(1337)
-  for i, aa in enumerate(fasta):
-    tl = ltl[aa].upper()
-    for a in atoms[tl] + ([] if i != len(fasta)-1 else ["OXT"]):
-      if len(a) < 4:
-        pa = " " + a
-      else:
-        pa = a
-      gr = lambda: 1.0*(random.random()-0.5)
-      x,y,z = gr(), gr(), gr()
-      x += res_num*5
-      s = "ATOM %6d %-4s %3s A %3d    %8.3f%8.3f%8.3f  1.00  1.00           %s" % \
-        (atom_num, pa, tl, res_num, x, y, z, a[0:1])
-      ss.append(s)
-      atom_num += 1
-    res_num += 1
+    atoms = get_atoms()
+    atom_num = 1
+    res_num = 1
+    ss = []
+    random.seed(1337)
+    for i, aa in enumerate(fasta):
+        tl = ltl[aa].upper()
+        for a in atoms[tl] + ([] if i != len(fasta)-1 else ["OXT"]):
+            if len(a) < 4:
+                pa = " " + a
+            else:
+                pa = a
+
+            def gr(): return 1.0*(random.random()-0.5)
+            x, y, z = gr(), gr(), gr()
+            x += res_num*5
+            s = "ATOM %6d %-4s %3s A %3d    %8.3f%8.3f%8.3f  1.00  1.00           %s" % \
+                (atom_num, pa, tl, res_num, x, y, z, a[0:1])
+            ss.append(s)
+            atom_num += 1
+        res_num += 1
+
+    with open(fn, "w") as f:
+        f.write('\n'.join(ss))
+
 
-  with open(fn, "w") as f:
-    f.write('\n'.join(ss))
-  
 def invert(dd):
-  dd = dd.upper()
-  def _invert(x):
-    if x == 'A':
-      return 'T'
-    elif x == 'T':
-      return 'A'
-    elif x == 'C':
-      return 'G'
-    elif x == 'G':
-      return 'C'
-  return (''.join([_invert(x) for x in dd]))[::-1]
+    dd = dd.upper()
 
-import pathlib
-import os
-import json
-with open(os.path.join(pathlib.Path(__file__).parent.absolute(), "data", "allseq.json")) as f:
-  allseq = json.load(f)
-cc = allseq['MN908947']
+    def _invert(x):
+        if x == 'A':
+            return 'T'
+        elif x == 'T':
+            return 'A'
+        elif x == 'C':
+            return 'G'
+        elif x == 'G':
+            return 'C'
+    return (''.join([_invert(x) for x in dd]))[::-1]
 
+
+with open(os.path.join(pathlib.Path(__file__).parent.absolute(), "data", "allseq.json")) as f:
+    allseq = json.load(f)
+nucleotideSequence = allseq['MN908947']
+builder = GenomeBuilder(nucleotideSequence)
+genome = builder.build()
diff --git a/opt.py b/opt.py
index c909e4d..4f9fc9c 100755
--- a/opt.py
+++ b/opt.py
@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
-from lib import cc as virus
+from lib import genome as virus
 from vaccine.load import dat as vaccine
 from corona import corona
 
-virus = virus.replace("T", "U")
+virus = virus.get_nucleotideSequence().replace("T", "U")
 vaccine = vaccine.replace("Ψ", "U")
 
 """
@@ -21,4 +21,3 @@
 
 print(vvirus)
 print(vaccine)
-