diff --git a/mag_annotator/CONFIG b/mag_annotator/CONFIG
index 45f3b20a..0f019614 100644
--- a/mag_annotator/CONFIG
+++ b/mag_annotator/CONFIG
@@ -1,28 +1,155 @@
 {
   "search_databases": {
-    "kegg": null,
-    "kofam_hmm": null,
-    "kofam_ko_list": null,
-    "uniref": null,
-    "pfam": null,
-    "dbcan": null,
-    "viral": null,
-    "peptidase": null,
-    "vogdb": null
+    "kegg": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/kegg.20221012.mmsdb",
+    "kofam_hmm": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/kofam_profiles.hmm",
+    "kofam_ko_list": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/kofam_ko_list.tsv",
+    "uniref": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/uniref90.20220928.mmsdb",
+    "pfam": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/pfam.mmspro",
+    "dbcan": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/dbCAN-HMMdb-V11.txt",
+    "viral": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/refseq_viral.20220928.mmsdb",
+    "peptidase": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/peptidases.20220928.mmsdb",
+    "vogdb": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/vog_latest_hmms.txt"
   },
-  "custom_dbs": null,
   "database_descriptions": {
-    "pfam_hmm_dat": null,
-    "dbcan_fam_activities": null,
-    "vog_annotations": null
+    "pfam_hmm": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/Pfam-A.hmm.dat.gz",
+    "dbcan_fam_activities": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/CAZyDB.08062022.fam-activities.txt",
+    "dbcan_subfam_ec": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/CAZyDB.08062022.fam.subfam.ec.txt",
+    "vog_annotations": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/vog_annotations_latest.tsv.gz"
   },
   "dram_sheets": {
-    "genome_summary_form": null,
-    "module_step_form": null,
-    "etc_module_database": null,
-    "function_heatmap_form": null,
-    "amg_database": null
+    "genome_summary_form": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/genome_summary_form.20220928.tsv",
+    "module_step_form": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/module_step_form.20220928.tsv",
+    "etc_module_database": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/etc_mdoule_database.20220928.tsv",
+    "function_heatmap_form": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/function_heatmap_form.20220928.tsv",
+    "amg_database": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/amg_database.20220928.tsv"
   },
-  "description_db": null,
-  "dram_version": null
-}
\ No newline at end of file
+  "dram_version": "1.4.0rc1",
+  "description_db": "/home/projects-wrighton-2/DRAM/development_flynn/public_DRAM/sep_12_22_dram1.4_rc_setup_test/testoutput/DRAM1_4_pycallgraph_3/description_db.sqlite",
+  "setup_info": {
+    "kegg": {
+      "name": "KEGG db",
+      "description_db_updated": "10/12/2022, 18:52:36",
+      "citation": " M. Kanehisa, M. Furumichi, Y. Sato, M. Ishiguro-Watanabe, and M. Tanabe, \"Kegg: integrating viruses and cellular organisms,\" Nucleic acids research, vol. 49, no. D1, pp. D545\u2013D551, 2021."
+    },
+    "kofam_hmm": {
+      "name": "KOfam db",
+      "citation": "T. Aramaki, R. Blanc-Mathieu, H. Endo, K. Ohkubo, M. Kanehisa, S. Goto, and H. Ogata, \"Kofamkoala: Kegg ortholog assignment based on profile hmm and adaptive score threshold,\" Bioinformatics, vol. 36, no. 7, pp. 2251\u20132252, 2020.",
+      "Download time": "09/28/2022, 11:00:09",
+      "Origin": "Downloaded by DRAM"
+    },
+    "kofam_ko_list": {
+      "name": "KOfam KO list",
+      "citation": "T. Aramaki, R. Blanc-Mathieu, H. Endo, K. Ohkubo, M. Kanehisa, S. Goto, and H. Ogata, \"Kofamkoala: Kegg ortholog assignment based on profile hmm and adaptive score threshold,\" Bioinformatics, vol. 36, no. 7, pp. 2251\u20132252, 2020.",
+      "Download time": "09/28/2022, 11:00:11",
+      "Origin": "Downloaded by DRAM"
+    },
+    "uniref": {
+      "name": "UniRef db",
+      "description_db_updated": "09/29/2022, 13:14:40",
+      "citation": "Y. Wang, Q. Wang, H. Huang, W. Huang, Y. Chen, P. B. McGarvey, C. H. Wu, C. N. Arighi, and U. Consortium, \"A crowdsourcing open platform for literature curation in uniprot,\" PLoS Biology, vol. 19, no. 12, p. e3001464, 2021.",
+      "version": "90",
+      "Download time": "09/28/2022, 11:15:01",
+      "Origin": "Downloaded by DRAM"
+    },
+    "pfam": {
+      "name": "Pfam db",
+      "citation": "J. Mistry, S. Chuguransky, L. Williams, M. Qureshi, G. A. Salazar, E. L. Sonnhammer, S. C. Tosatto, L. Paladin, S. Raj, L. J. Richardson et al., \"Pfam: The protein families database in 2021,\" Nucleic acids research, vol. 49, no. D1, pp. D412\u2013D419, 2021.",
+      "Download time": "09/28/2022, 11:49:29",
+      "Origin": "Downloaded by DRAM",
+      "description_db_updated": "09/29/2022, 13:23:47"
+    },
+    "pfam_hmm": {
+      "name": "Pfam hmm dat",
+      "description_db_updated": "Unknown, or Never",
+      "citation": "J. Mistry, S. Chuguransky, L. Williams, M. Qureshi, G. A. Salazar, E. L. Sonnhammer, S. C. Tosatto, L. Paladin, S. Raj, L. J. Richardson et al., \"Pfam: The protein families database in 2021,\" Nucleic acids research, vol. 49, no. D1, pp. D412\u2013D419, 2021.",
+      "Download time": "09/28/2022, 11:49:31",
+      "Origin": "Downloaded by DRAM"
+    },
+    "dbcan": {
+      "name": "dbCAN db",
+      "citation": "Y. Yin, X. Mao, J. Yang, X. Chen, F. Mao, and Y. Xu, \"dbcan: a web resource for automated carbohydrate-active enzyme annotation,\" Nucleic acids research, vol. 40, no. W1, pp. W445\u2013W451, 2012.",
+      "version": "11",
+      "Download time": "09/28/2022, 11:49:33",
+      "Origin": "Downloaded by DRAM",
+      "description_db_updated": "09/29/2022, 13:23:50"
+    },
+    "dbcan_fam_activities": {
+      "name": "dbCAN family activities",
+      "citation": "Y. Yin, X. Mao, J. Yang, X. Chen, F. Mao, and Y. Xu, \"dbcan: a web resource for automated carbohydrate-active enzyme annotation,\" Nucleic acids research, vol. 40, no. W1, pp. W445\u2013W451, 2012.",
+      "version": "11",
+      "upload_date": "08062022",
+      "Download time": "09/28/2022, 11:49:33",
+      "Origin": "Downloaded by DRAM"
+    },
+    "dbcan_subfam_ec": {
+      "name": "dbCAN subfamily EC numbers",
+      "citation": "Y. Yin, X. Mao, J. Yang, X. Chen, F. Mao, and Y. Xu, \"dbcan: a web resource for automated carbohydrate-active enzyme annotation,\" Nucleic acids research, vol. 40, no. W1, pp. W445\u2013W451, 2012.",
+      "version": "11",
+      "upload_date": "08062022",
+      "Download time": "09/28/2022, 11:49:33",
+      "Origin": "Downloaded by DRAM"
+    },
+    "vogdb": {
+      "name": "VOGDB db",
+      "citation": "J. Thannesberger, H.-J. Hellinger, I. Klymiuk, M.-T. Kastner, F. J. Rieder, M. Schneider, S. Fister, T. Lion, K. Kosulin, J. Laengle et al., \"Viruses comprise an extensive pool of mobile genetic elements in eukaryote cell cultures and human clinical samples,\" The FASEB Journal, vol. 31, no. 5, pp. 1987\u20132000, 2017.",
+      "version": "latest",
+      "Download time": "09/28/2022, 11:51:57",
+      "Origin": "Downloaded by DRAM",
+      "description_db_updated": "09/29/2022, 13:24:14"
+    },
+    "vog_annotations": {
+      "name": "VOG annotations",
+      "description_db_updated": "Unknown, or Never",
+      "citation": "J. Thannesberger, H.-J. Hellinger, I. Klymiuk, M.-T. Kastner, F. J. Rieder, M. Schneider, S. Fister, T. Lion, K. Kosulin, J. Laengle et al., \"Viruses comprise an extensive pool of mobile genetic elements in eukaryote cell cultures and human clinical samples,\" The FASEB Journal, vol. 31, no. 5, pp. 1987\u20132000, 2017.",
+      "version": "latest",
+      "Download time": "09/28/2022, 11:51:58",
+      "Origin": "Downloaded by DRAM"
+    },
+    "viral": {
+      "name": "RefSeq Viral db",
+      "description_db_updated": "09/29/2022, 13:16:15",
+      "citation": "J. R. Brister, D. Ako-Adjei, Y. Bao, and O. Blinkova, \"Ncbi viral genomes resource,\" Nucleic acids research, vol. 43, no. D1, pp. D571\u2013D577, 2015. [3] M. Kanehisa, M. Furumichi, Y. Sato, M. Ishiguro-Watanabe, and M. Tan-abe, \"Kegg: integrating viruses and cellular organisms,\" Nucleic acids research, vol. 49, no. D1, pp. D545\u2013D551, 2021.",
+      "viral_files": 2,
+      "Download time": "09/28/2022, 11:52:20",
+      "Origin": "Downloaded by DRAM"
+    },
+    "peptidase": {
+      "name": "MEROPS peptidase db",
+      "description_db_updated": "09/29/2022, 13:23:40",
+      "citation": "N. D. Rawlings, A. J. Barrett, P. D. Thomas, X. Huang, A. Bateman, and R. D. Finn, \"The merops database of proteolytic enzymes, their substrates and inhibitors in 2017 and a comparison with peptidases in the panther database,\" Nucleic acids research, vol. 46, no. D1, pp. D624\u2013D632, 2018.",
+      "Download time": "09/28/2022, 12:01:46",
+      "Origin": "Downloaded by DRAM"
+    },
+    "genome_summary_form": {
+      "name": "Genome summary form",
+      "branch": "master",
+      "Download time": "09/28/2022, 12:01:46",
+      "Origin": "Downloaded by DRAM"
+    },
+    "module_step_form": {
+      "name": "Module step form",
+      "branch": "master",
+      "Download time": "09/28/2022, 12:01:47",
+      "Origin": "Downloaded by DRAM"
+    },
+    "function_heatmap_form": {
+      "name": "Function heatmap form",
+      "branch": "master",
+      "Download time": "09/28/2022, 12:01:47",
+      "Origin": "Downloaded by DRAM"
+    },
+    "amg_database": {
+      "name": "AMG database",
+      "branch": "master",
+      "Download time": "09/28/2022, 12:01:47",
+      "Origin": "Downloaded by DRAM"
+    },
+    "etc_module_database": {
+      "name": "ETC module database",
+      "branch": "master",
+      "Download time": "09/28/2022, 12:01:47",
+      "Origin": "Downloaded by DRAM"
+    }
+  },
+  "log_path": null
+}
diff --git a/mag_annotator/__init__.py b/mag_annotator/__init__.py
index 4e7c72a5..9e0feee7 100644
--- a/mag_annotator/__init__.py
+++ b/mag_annotator/__init__.py
@@ -1 +1 @@
-__version__ = '1.4.3'
+__version__ = '1.4.4'
diff --git a/mag_annotator/database_handler.py b/mag_annotator/database_handler.py
index 78a8bcd7..a94bca9f 100644
--- a/mag_annotator/database_handler.py
+++ b/mag_annotator/database_handler.py
@@ -5,11 +5,10 @@
 import logging
 from shutil import copy2
 import warnings
-from sqlalchemy import create_engine
-from sqlalchemy.orm import sessionmaker
-
 from datetime import datetime
 from functools import partial
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
 
 import pandas as pd
 
diff --git a/mag_annotator/database_processing.py b/mag_annotator/database_processing.py
index 72dff9ea..cee0eefa 100644
--- a/mag_annotator/database_processing.py
+++ b/mag_annotator/database_processing.py
@@ -26,54 +26,6 @@
 DEFAULT_MMMSPRO_DB_NAME = 'db'
 
 
-# KOFAM_CITATION = ("Aramaki T., Blanc-Mathieu R., Endo H., Ohkubo K., Kanehisa "
-#                   "M., Goto S., Ogata H.\nKofamKOALA: KEGG ortholog assignment"
-#                   " based on profile HMM and adaptive score threshold.\nBioinf"
-#                   "ormatics. 2019 Nov 19. pii: btz859. doi: 10.1093/bioinforma"
-#                   "tics/btz859."
-#                   ) # arguably not for kofam but the closest I saw
-# VIRAL_REFSEQ_CITATION = ("Brister JR, Ako-Adjei D, Bao Y, Blinkova O. NCBI vir"
-#                          "al genomes resource. Nucleic Acids Res. 2015 Jan;43("
-#                          "Database issue):D571-7 PubMed PubMedCentral"
-#                          ) # Three options but this one is viral specific
-# KEGG_CITATION = ("Kanehisa, M., Furumichi, M., Sato, Y., Ishiguro-Watanabe, M."
-#                  ", and Tanabe, M.; KEGG: integrating viruses and cellular org"
-#                  "anisms. Nucleic Acids Res. 49, D545-D551 (2021)."
-#                  )
-# PFAM_CITATION = ("Pfam: The protein families database in 2021: J. Mistry, S. C"
-#                  "huguransky, L. Williams, M. Qureshi, G.A. Salazar, E.L.L. So"
-#                  "nnhammer, S.C.E. Tosatto, L. Paladin, S. Raj, L.J. Richardso"
-#                  "n, R.D. Finn, A. Bateman"
-#                  )
-# PEPTIDASE_CITATION = ("Rawlings, N.D., Barrett, A.J., Thomas, P.D., Huang, X.,"
-#                       " Bateman, A. & Finn, R.D. (2018) The MEROPS database of"
-#                       " proteolytic enzymes, their substrates and inhibitors i"
-#                       "n 2017 and a comparison with peptidases in the PANTHER "
-#                       "database. Nucleic Acids Res 46, D624-D632."
-#                       )
-# VOGDB_CITATION = ("Thannesberger, J., Hellinger, H. J., Klymiuk, I., Kastner, M"
-#                   ". T., Rieder, F. J., Schneider, M., ... & Steininger, C. (20"
-#                   "17). Viruses comprise an extensive pool of mobile genetic el"
-#                   "ements in eukaryote cell cultures and human clinical samples"
-#                   ". The FASEB Journal, 31(5), 1987-2000."
-#                   )
-# UNIREF_CITATION = ("Wang Y, Wang Q, Huang H, Huang W, Chen Y, McGarvey PB, Wu C"
-#                    "H, Arighi CN, UniProt Consortium. A crowdsourcing open plat"
-#                    "form for literature curation in UniProt Plos Biology. 19(12"
-#                    "):e3001464 (2021)"
-#                    )
-# DBCAN_CITATION = ("Yin Y*, Mao X*, Yang JC, Chen X, Mao F and Xu Y, dbCAN: a we"
-#                   "b resource for automated carbohydrate-active enzyme annotati"
-#                   "on, Nucleic Acids Res. 2012 Jul;40(Web Server issue):W445-51"
-#                   ) # again a citation for the tool not the db
-# DRAM_CITATION = ("M. Shaffer, M. A. Borton, B. B. McGivern, A. A. Zayed, S. L. "
-#                  "La Rosa, L. M. Solden, P. Liu, A. B. Narrowe, J. Rodríguez-Ra"
-#                  "mos, B. Bolduc et al., \"Dram for distilling microbial metabo"
-#                  "lism to automate the curation of microbiome function,\" Nucle"
-#                  "ic acids research, vol. 48, no. 16, pp. 8883–8900, 2020."
-#                  )
-
-
 KOFAM_CITATION = ("T. Aramaki, R. Blanc-Mathieu, H. Endo, K. Ohkubo, M. Kanehisa"
                   ", S. Goto, and H. Ogata, \"Kofamkoala: Kegg ortholog assignme"
                   "nt based on profile hmm and adaptive score threshold,\" Bioin"
diff --git a/mag_annotator/database_setup.py b/mag_annotator/database_setup.py
index c0c2216d..dbe686c6 100644
--- a/mag_annotator/database_setup.py
+++ b/mag_annotator/database_setup.py
@@ -82,25 +82,6 @@ def serialize(self):
             'dbcan_subfam_ec': self.ec,
         }
 
-
-# DBCAN_SUBFAM_EC_TABLE_NAME = 'dbcan_subfam_ec'
-
-
-# class DbcanSubfamEC(Base):
-#     __tablename__ = DBCAN_SUBFAM_EC_TABLE_NAME
-# 
-#     id = Column(String(30), primary_key=True, nullable=False, index=True)
-# 
-#     description = Column(String(1000))
-# 
-#     @property
-#     def serialize(self):
-#         return {
-#             'dbcan_id': self.id,
-#             'dbcan_subfam_ec': self.description,
-#         }
-
-
 VIRAL_DESCRIPTION_TABLE_NAME = 'viral_description'
 
 
@@ -164,7 +145,6 @@ def create_description_db(db_loc):
                             UNIREF_DESCRIPTION_TABLE_NAME: UniRefDescription,
                             PFAM_DESCRIPTION_TABLE_NAME: PfamDescription,
                             DBCAN_DESCRIPTION_TABLE_NAME: DbcanDescription,
-                            # DBCAN_SUBFAM_EC_TABLE_NAME: DbcanSubfamEC,
                             VIRAL_DESCRIPTION_TABLE_NAME: ViralDescription,
                             PEPTIDASE_DESCRIPTION_TABLE_NAME: PeptidaseDescription,
                             VOGDB_DESCRIPTION_TABLE_NAME: VOGDBDescription}
diff --git a/mag_annotator/summarize_genomes.py b/mag_annotator/summarize_genomes.py
index 3c2d22a9..c58cb130 100644
--- a/mag_annotator/summarize_genomes.py
+++ b/mag_annotator/summarize_genomes.py
@@ -97,7 +97,7 @@ def fill_genome_summary_frame_gene_names(annotations, genome_summary_frame, grou
     for genome, frame in annotations.groupby(groupby_column, sort=False):
         # make dict of identifiers to gene names
         id_gene_dict = defaultdict(list)
-        for gene, ids in get_ids_from_annotations_by_row(frame).iteritems():
+        for gene, ids in get_ids_from_annotations_by_row(frame).items():
             for id_ in ids:
                 id_gene_dict[id_].append(gene)
         # fill in genome summary_frame
@@ -316,7 +316,7 @@ def get_module_step_coverage(kos, module_net):
 def make_module_coverage_df(annotation_df, module_nets):
     kos_to_genes = defaultdict(list)
     ko_id_name = 'kegg_id' if 'kegg_id' in annotation_df.columns else 'ko_id'
-    for gene_id, ko_list in annotation_df[ko_id_name].iteritems():
+    for gene_id, ko_list in annotation_df[ko_id_name].items():
         if type(ko_list) is str:
             for ko in ko_list.split(','):
                 kos_to_genes[ko].append(gene_id)
diff --git a/mag_annotator/summarize_vgfs.py b/mag_annotator/summarize_vgfs.py
index 61df3d88..b0ca6a3b 100644
--- a/mag_annotator/summarize_vgfs.py
+++ b/mag_annotator/summarize_vgfs.py
@@ -104,33 +104,49 @@ def make_viral_stats_table(annotations, potential_amgs, groupby_column='scaffold
     return pd.DataFrame(viral_stats_series).fillna(0)
 
 
-def make_viral_distillate(potential_amgs, genome_summary_form):
-    rows = list()
-    potential_amgs['ids'] = get_ids_from_annotations_by_row(potential_amgs)
-    potential_amgs.iloc[0]
-    logger =  logging.getLogger()
-    check_columns(potential_amgs, logger)
-    missing_count = 0
-    for gene, row in potential_amgs.iterrows():
-        gene_ids = row.ids & set(genome_summary_form.index)
-        if len(gene_ids) > 0:
-            for gene_id in gene_ids:
-                gene_summary = genome_summary_form.loc[gene_id]
-                if type(gene_summary) is pd.Series:
-                    rows.append([gene, row['scaffold'], gene_id, gene_summary['gene_description'],
-                                 gene_summary['sheet'], gene_summary['header'], gene_summary['subheader'],
-                                 gene_summary['module'], row['auxiliary_score'], row['amg_flags']])
-                else:
-                    for sub_gene_id, sub_gene_summary in gene_summary.iterrows():
-                        rows.append([gene, row['scaffold'], gene_id, sub_gene_summary['gene_description'],
-                                     sub_gene_summary['sheet'], sub_gene_summary['header'],
-                                     sub_gene_summary['subheader'], sub_gene_summary['module'],
-                                     row['auxiliary_score'], row['amg_flags']])
-        else:
-            warnings.warn("No distillate information found for gene %s" % gene)
-            rows.append([gene, row['scaffold'], '', '', '', '', '', '', row['auxiliary_score'],
-                         row['amg_flags']])
-    return pd.DataFrame(rows, columns=VIRAL_DISTILLATE_COLUMNS)
+def make_viral_distillate(potential_amgs, genome_summary_form, amg_database, logger):
+    """Make a summary of what in our database makes something a AMG or likly AMG to dram"""
+    # Transform the amg database to make it more workable
+    def look_up_metabolic_info(search_db, match_db, match_db_name):
+        id_genes = set(match_db.index)
+        return (
+            (search_db
+             .assign(gene_id = lambda x: x['ids'].apply(lambda y: y & id_genes))
+             )[['gene_id', 'scaffold', 'auxiliary_score', 'amg_flags']]
+            .explode('gene_id')
+            .dropna(subset=['gene_id'])
+            .merge(match_db, how='left', left_on='gene_id', right_index=True)
+            .assign(gene_id_origin=match_db_name))
+
+    amg_database_frame = (amg_database
+                          .melt(value_vars=['KO', 'EC', 'PFAM'], 
+                                id_vars=['gene', 'module', 'metabolism', 
+                                         'reference', 'verified'],
+                                value_name='gene_id')
+                          .drop('variable', axis=1)
+                          .assign(
+                              gene_id=lambda x: x['gene_id'].apply(
+                                  lambda y: [i.strip() for i in str(y).split(';')]))
+                          .explode('gene_id')
+                          .dropna(subset='gene_id')
+                          .set_index('gene_id')
+                          .rename(columns = {'gene': 'gene_description'})
+                          )
+    potential_amgs = potential_amgs.assign(ids=get_ids_from_annotations_by_row(potential_amgs))
+    metabolic_df = look_up_metabolic_info(potential_amgs, genome_summary_form, 'genome_summary_form')
+    amg_df = look_up_metabolic_info(potential_amgs, amg_database_frame, 'amg_database')
+    missing = list(set(potential_amgs.index) - (set(metabolic_df.index) | (set(amg_df.index)) ))
+    # evaluate what is mising
+    logger.warning(f"No distillate information found for {len(missing)} genes.")
+    logger.debug('\n'.join(missing))
+
+
+    summary = pd.concat([
+        metabolic_df,
+        amg_df,
+        potential_amgs.loc[missing, ['scaffold', 'auxiliary_score', 'amg_flags']]])
+    summary.reset_index(inplace=True, drop=False, names='gene')
+    return summary
 
 
 def make_vgf_order(amgs):
@@ -251,7 +267,11 @@ def summarize_vgfs(input_file, output_dir, groupby_column='scaffold', max_auxili
     viral_genome_stats.to_csv(path.join(output_dir, 'vMAG_stats.tsv'), sep='\t')
     logger.info('Calculated viral genome statistics')
 
-    viral_distillate = make_viral_distillate(potential_amgs, genome_summary_form)
+    viral_distillate = make_viral_distillate(
+        potential_amgs,
+        genome_summary_form,
+        pd.read_csv(database_handler.config["dram_sheets"].get('amg_database'), sep='\t'),
+        logger)
     viral_distillate.to_csv(path.join(output_dir, 'amg_summary.tsv'), sep='\t', index=None)
     logger.info('Generated AMG summary')
 
diff --git a/scripts/DRAM-v.py b/scripts/DRAM-v.py
index 38abc0e9..93b110f3 100644
--- a/scripts/DRAM-v.py
+++ b/scripts/DRAM-v.py
@@ -27,7 +27,7 @@
     annotate_parser.add_argument('-i', '--input_fasta', help="fasta file, output from ", required=True)
     annotate_parser.add_argument('-v', '--virsorter_affi_contigs', help="VirSorter VIRSorter_affi-contigs.tab "
                                                                         "output file")
-    annotate_parser.add_argument('-o', '--output_dir', help="output directory")
+    annotate_parser.add_argument('-o', '--output_dir', help="output directory", required=True)
     annotate_parser.add_argument('--min_contig_size', type=int, default=2500,
                                  help='minimum contig size to be used for gene prediction')
     annotate_parser.add_argument('--split_contigs', action='store_true', default=False,
@@ -78,7 +78,7 @@
 
     # parser for summarizing genomes
     distill_parser.add_argument("-i", "--input_file", help="Annotations path")
-    distill_parser.add_argument("-o", "--output_dir", help="Directory to write summarized genomes")
+    distill_parser.add_argument("-o", "--output_dir", help="Directory to write summarized genomes", required=True)
     distill_parser.add_argument("--groupby_column", help="Column from annotations to group as VGF units",
                                 default='scaffold')
     distill_parser.add_argument("--max_auxiliary_score", type=int, default=3,
@@ -134,7 +134,7 @@
 
     # parser for getting gene neighborhoods
     neighborhood_parser.add_argument("-i", "--input_file", help="Annotations path")
-    neighborhood_parser.add_argument("-o", "--output_dir", help="Directory to write gene neighborhoods")
+    neighborhood_parser.add_argument("-o", "--output_dir", help="Directory to write gene neighborhoods", required=True)
     neighborhood_parser.add_argument("--genes", nargs='*', help="Gene names from DRAM to find neighborhoods around")
     neighborhood_parser.add_argument("--identifiers", nargs='*',
                                      help="Database identifiers assigned by DRAM to find neighborhoods around")
diff --git a/scripts/DRAM.py b/scripts/DRAM.py
index 2430a68d..50228449 100644
--- a/scripts/DRAM.py
+++ b/scripts/DRAM.py
@@ -32,7 +32,7 @@
     annotate_parser.add_argument('-i', '--input_fasta', action='append',
                                  help="fasta file, optionally with wildcards to point to multiple fastas",
                                  required=True)
-    annotate_parser.add_argument('-o', '--output_dir', help="output directory")
+    annotate_parser.add_argument('-o', '--output_dir', help="output directory", required=True)
     annotate_parser.add_argument('--min_contig_size', type=int, default=2500,
                                  help='minimum contig size to be used for gene prediction')
     annotate_parser.add_argument('--config_loc', default=None,
@@ -130,7 +130,7 @@
 
     # parser for summarizing genomes
     distill_parser.add_argument("-i", "--input_file", help="Annotations path")
-    distill_parser.add_argument("-o", "--output_dir", help="Directory to write summarized genomes")
+    distill_parser.add_argument("-o", "--output_dir", required=True, help="Directory to write summarized genomes")
     distill_parser.add_argument('--log_file_path', 
                                        help="A name and loctation for the log file")
     distill_parser.add_argument("--rrna_path", help="rRNA output from annotation")
@@ -179,7 +179,7 @@
 
     # parser for getting gene neighborhoods
     neighborhood_parser.add_argument("-i", "--input_file", help="Annotations path")
-    neighborhood_parser.add_argument("-o", "--output_dir", help="Directory to write gene neighborhoods")
+    neighborhood_parser.add_argument("-o", "--output_dir", required=True, help="Directory to write gene neighborhoods")
     neighborhood_parser.add_argument("--genes", nargs='*', help="Gene names from DRAM to find neighborhoods around")
     neighborhood_parser.add_argument("--identifiers", nargs='*',
                                      help="Database identifiers assigned by DRAM to find neighborhoods around")
@@ -199,7 +199,7 @@
     # TODO: Make it work with append so you can use multiple -i's
     merge_annotations_parser.add_argument("-i", "--input_dirs", help="Path with wildcards pointing to DRAM annotation "
                                                                      "output directories")
-    merge_annotations_parser.add_argument("-o", "--output_dir", help="Path to output merged annotations files")
+    merge_annotations_parser.add_argument("-o", "--output_dir", required=True,  help="Path to output merged annotations files")
     merge_annotations_parser.set_defaults(func=merge_annotations_cmd)
 
     args = parser.parse_args()
diff --git a/tests/test_summarize_vgfs.py b/tests/test_summarize_vgfs.py
index b9e5681f..01639a33 100644
--- a/tests/test_summarize_vgfs.py
+++ b/tests/test_summarize_vgfs.py
@@ -1,28 +1,49 @@
 import pytest
+import logging
+from mag_annotator.utils import setup_logger
 
 import pandas as pd
 import altair as alt
 
-from mag_annotator.summarize_vgfs import filter_to_amgs, get_strand_switches, make_viral_distillate, make_vgf_order, \
-    make_amg_count_column, make_viral_functional_df, make_viral_functional_heatmap
+from mag_annotator.summarize_vgfs import (
+    filter_to_amgs,
+    get_strand_switches,
+    make_viral_distillate,
+    make_vgf_order,
+    make_amg_count_column,
+    make_viral_functional_df,
+    make_viral_functional_heatmap,
+)
 
 
 @pytest.fixture()
 def annotations():
-    return pd.DataFrame([['scaffold_1', 'K99999', 'VTF', 1],
-                         ['scaffold_1', 'K12345', 'M', 4],
-                         ['scaffold_1', 'K00001', 'MFTJ', 3],
-                         ['scaffold_1', 'K11111', 'MKE', 2]],
-                        index=['gene_1', 'gene_2', 'gene_3', 'gene_4'],
-                        columns=['scaffold', 'ko_id', 'amg_flags', 'auxiliary_score'])
+    return pd.DataFrame(
+        [
+            ["scaffold_1", "K99999", "VTF", 1],
+            ["scaffold_1", "K12345", "M", 4],
+            ["scaffold_1", "K00001", "MFTJ", 3],
+            ["scaffold_1", "K11111", "MKE", 2],
+        ],
+        index=["gene_1", "gene_2", "gene_3", "gene_4"],
+        columns=["scaffold", "ko_id", "amg_flags", "auxiliary_score"],
+    )
 
 
 @pytest.fixture()
 def pamgs():
-    return pd.DataFrame([['scaffold_1', 'K00001', 'MFTJ', 3],
-                         ['scaffold_1', 'K11111', 'MKE', 2]],
-                        index=['gene_3', 'gene_4'],
-                        columns=['scaffold', 'ko_id', 'amg_flags', 'auxiliary_score'])
+    return pd.DataFrame(
+        [["scaffold_1", "K00001", "MFTJ", 3], ["scaffold_1", "K11111", "MKE", 2]],
+        index=["gene_3", "gene_4"],
+        columns=["scaffold", "ko_id", "amg_flags", "auxiliary_score"],
+    )
+
+
+@pytest.fixture()
+def logger(tmpdir):
+    logger = logging.getLogger("test_log")
+    setup_logger(logger)
+    return logger
 
 
 def test_filter_to_amgs(annotations, pamgs):
@@ -31,35 +52,129 @@ def test_filter_to_amgs(annotations, pamgs):
 
 
 def test_get_strand_switches():
-    assert get_strand_switches(['+', '-', '+']) == 2
-    assert get_strand_switches(['+', '+', '+']) == 0
-    assert get_strand_switches(['-']) == 0
-    assert get_strand_switches(['+', '+', '-', '-']) == 1
+    assert get_strand_switches(["+", "-", "+"]) == 2
+    assert get_strand_switches(["+", "+", "+"]) == 0
+    assert get_strand_switches(["-"]) == 0
+    assert get_strand_switches(["+", "+", "-", "-"]) == 1
 
 
 @pytest.fixture()
 def genome_summary_frame():
-    return pd.DataFrame(pd.DataFrame([['description', 'module1', 'main', 'header1', 'subheader1'],
-                                      ['description2', 'module2', 'main', 'header1', 'subheader1'],
-                                      ['description3', 'module3', 'other', 'header1', 'subheader1']],
-                                     index=['K00001', 'K12345', 'K00001'],
-                                     columns=['gene_description', 'module', 'sheet', 'header', 'subheader']))
-
-
-def test_make_viral_distillate(pamgs, genome_summary_frame):
-    test_viral_distillate = make_viral_distillate(pamgs, genome_summary_frame)
-    viral_distillate = pd.DataFrame([['gene_3', 'scaffold_1', 'K00001', 'description', 'main', 'header1', 'subheader1',
-                                      'module1', 3, 'MFTJ'],
-                                     ['gene_3', 'scaffold_1', 'K00001', 'description3', 'other', 'header1',
-                                      'subheader1', 'module3', 3, 'MFTJ'],
-                                     ['gene_4', 'scaffold_1', '', '', '', '', '', '', 2, 'MKE']],
-                                    columns=['gene', 'scaffold', 'gene_id', 'gene_description', 'category', 'header',
-                                             'subheader', 'module', 'auxiliary_score', 'amg_flags'])
-    pd.testing.assert_frame_equal(test_viral_distillate, viral_distillate)
+    return pd.DataFrame(
+        pd.DataFrame(
+            [
+                ["description", "module1", "main", "header1", "subheader1"],
+                ["description2", "module2", "main", "header1", "subheader1"],
+                ["description3", "module3", "other", "header1", "subheader1"],
+            ],
+            index=["K00001", "K12345", "K00001"],
+            columns=["gene_description", "module", "sheet", "header", "subheader"],
+        )
+    )
+
+
+def test_make_viral_distillate(pamgs, genome_summary_frame, logger):
+    amg_database = pd.DataFrame(
+        [
+            [
+                "K11111",
+                "gene_4",
+                "gene_4",
+                "d4",
+                "module3",
+                "metabolism3",
+                "reference2",
+                "true",
+            ]
+        ],
+        columns=[
+            "KO",
+            "EC",
+            "PFAM",
+            "gene",
+            "module",
+            "metabolism",
+            "reference",
+            "verified",
+        ],
+    )
+    output_viral_distillate = make_viral_distillate(
+        pamgs, genome_summary_frame, amg_database=amg_database, logger=logger
+    )
+    expected_viral_distillate = pd.DataFrame(
+        [
+            [
+                "gene_3",
+                "scaffold_1",
+                "K00001",
+                "description",
+                "header1",
+                "subheader1",
+                "module1",
+                3,
+                "MFTJ",
+                "main",
+                "genome_summary_form",
+                "",
+                "",
+            ],
+            [
+                "gene_3",
+                "scaffold_1",
+                "K00001",
+                "description3",
+                "header1",
+                "subheader1",
+                "module3",
+                3,
+                "MFTJ",
+                "other",
+                "genome_summary_form",
+                "",
+                "",
+                "",
+            ],
+            [
+                "gene_4",
+                "scaffold_1",
+                "K11111",
+                "d4",
+                "",
+                "",
+                "module3",
+                2,
+                "MKE",
+                "",
+                "amg_database",
+                "metabolism3",
+                "reference2",
+                "true",
+            ],
+        ],
+        columns=[
+            "gene",
+            "scaffold",
+            "gene_id",
+            "gene_description",
+            "header",
+            "subheader",
+            "module",
+            "auxiliary_score",
+            "amg_flags",
+            "sheet",
+            "gene_id_origin",
+            "metabolism",
+            "reference",
+            "verified",
+        ],
+    )
+    output_viral_distillate = output_viral_distillate.sort_index(axis=1).fillna("")
+    expected_viral_distillate = expected_viral_distillate.sort_index(axis=1).fillna("")
+    pd.testing.assert_frame_equal(output_viral_distillate, expected_viral_distillate)
 
 
 def test_make_vgf_order(pamgs):
-    assert make_vgf_order(pamgs) == ['scaffold_1']
+    assert make_vgf_order(pamgs) == ["scaffold_1"]
 
 
 def test_make_amg_count_column(pamgs):
@@ -68,14 +183,26 @@ def test_make_amg_count_column(pamgs):
 
 @pytest.fixture()
 def viral_functional_df():
-    return pd.DataFrame([['main', 'module1', 'gene_3', 'K00001', 'scaffold_1', True],
-                         ['other', 'module3', 'gene_3', 'K00001', 'scaffold_1', True]],
-                        columns=['Category', 'Function', 'AMG Genes', 'Genes Present', 'Contig Name',
-                                 'Present in Contig'])
+    return pd.DataFrame(
+        [
+            ["main", "module1", "gene_3", "K00001", "scaffold_1", True],
+            ["other", "module3", "gene_3", "K00001", "scaffold_1", True],
+        ],
+        columns=[
+            "Category",
+            "Function",
+            "AMG Genes",
+            "Genes Present",
+            "Contig Name",
+            "Present in Contig",
+        ],
+    )
 
 
 def test_make_viral_functional_df(pamgs, genome_summary_frame, viral_functional_df):
-    test_viral_function_df = make_viral_functional_df(pamgs, genome_summary_frame, 'scaffold')
+    test_viral_function_df = make_viral_functional_df(
+        pamgs, genome_summary_frame, "scaffold"
+    )
     pd.testing.assert_frame_equal(test_viral_function_df, viral_functional_df)