Skip to content

Commit

Permalink
Modified output file, added exclusion, changed library choosing
Browse files Browse the repository at this point in the history
  • Loading branch information
robsv committed Jul 12, 2023
1 parent 73e22f8 commit 8d21e84
Showing 1 changed file with 69 additions and 30 deletions.
99 changes: 69 additions & 30 deletions bin/publishing_check.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,23 @@
''' This program will...
''' This program will check sample IDs from a publishing database against sample IDs
from the publishedURL table (MongoDB neuronbridge). Any samples in the publishing
database but not neuronbridge will be reported.
'''
__version__ = '0.0.1'
__version__ = '1.0.0'

import argparse
from operator import attrgetter
import sys
import inquirer
import MySQLdb
from tqdm import tqdm
import jrc_common.jrc_common as JRC
import neuronbridge_lib as NB

# Database
DB = {}
COLL = {}
READ = {"RELEASE": "SELECT DISTINCT slide_code,workstation_sample_id FROM image_data_mv "
+ "WHERE alps_release=%s",
"SAMPLE": "SELECT DISTINCT slide_code,workstation_sample_id FROM image_data_mv "
+ "WHERE workstation_sample_id=%s",
READ = {"RELEASE": "SELECT DISTINCT line,slide_code,workstation_sample_id,alps_release "
+ "FROM image_data_mv WHERE alps_release=%s",
"SAMPLE": "SELECT DISTINCT line,slide_code,workstation_sample_id,alps_release "
+ "FROM image_data_mv WHERE workstation_sample_id=%s",
"ALL_SAMPLES": "SELECT DISTINCT workstation_sample_id,alps_release FROM image_data_mv",
}

Expand Down Expand Up @@ -45,7 +46,7 @@ def initialize_program():
terminate_program(err)
# Database
for source in (ARG.DATABASE, "neuronbridge"):
manifold = "staging" if source == ARG.DATABASE else ARG.MANIFOLD
manifold = "prod" if source == ARG.DATABASE else ARG.MANIFOLD
dbo = attrgetter(f"{source}.{manifold}.read")(dbconfig)
LOGGER.info("Connecting to %s %s on %s as %s", dbo.name, ARG.MANIFOLD, dbo.host, dbo.user)
try:
Expand All @@ -58,53 +59,87 @@ def initialize_program():
terminate_program(err)
COLL['publishedURL'] = DB['neuronbridge'].publishedURL
# Parms
if not ARG.LIBRARY:
ARG.LIBRARY = NB.get_library(DB['neuronbridge'].neuronMetadata, "flyem")
if ARG.LIBRARY:
ARG.LIBRARY = ARG.LIBRARY.split(",")
else:
defaults = ["flylight_split_gal4_published"]
if ARG.DATABASE == "gen1mcfo":
defaults = ["flylight_annotator_gen1_mcfo_published", "flylight_gen1_mcfo_published"]
results = DB['neuronbridge'].neuronMetadata.distinct("libraryName")
libraries = []
for row in results:
if "flyem" not in row:
libraries.append(row)
libraries.sort()
quest = [inquirer.Checkbox('checklist',
message='Select libraries to process',
choices=libraries, default=defaults)]
ARG.LIBRARY = inquirer.prompt(quest)['checklist']


def analyze_results(published, release_size, nb):
''' Compare published sample IDs to those in NeuronBridge
def missing_from_nb(missing_rel, published, nbd):
''' Find samples that are in the publishing database but not publishedURL
Keyword arguments:
published: dict of published sample IDs (value=release)
release_size: dict of published releases (value= #samples)
nb: dict of samples in NeuronBridge
nbd: dict of samples in NeuronBridge
Returns:
None
dict or releases (value=list of sample IDs)
'''
missing_rel = {} # release: [samples]
for row in published:
if "Sample#" + row not in nb:
if "Sample#" + row not in nbd:
if published[row] not in missing_rel:
missing_rel[published[row]] = [row]
else:
missing_rel[published[row]].append(row)


def analyze_results(published, release_size, nbd):
''' Compare published sample IDs to those in NeuronBridge
Keyword arguments:
published: dict of published sample IDs (value=release)
release_size: dict of published releases (value= #samples)
nbd: dict of samples in NeuronBridge
Returns:
None
'''
missing_rel = {}
missing_from_nb(missing_rel, published, nbd)
with open("missing_samples.txt", "w", encoding="ascii") as outstream:
for rel in sorted(missing_rel):
if len(missing_rel[rel]) == release_size[rel]:
del missing_rel[rel]
if ARG.EXCLUDENEW:
continue
print(f"{rel} is not in NeuronBridge")
try:
del missing_rel[rel]
DB[ARG.DATABASE]['cursor'].execute(READ['RELEASE'], (rel,))
rows = DB[ARG.DATABASE]['cursor'].fetchall()
except MySQLdb.Error as err:
terminate_program(JRC.sql_error(err))
for row in rows:
outstream.write(f"{row['slide_code']}\t{row['workstation_sample_id']}\n")
outstream.write(f"{row['line']}\t{row['slide_code']}\t"
+ f"{row['workstation_sample_id']}\t{row['alps_release']}\n")
else:
print(f"{rel} is missing {len(missing_rel[rel])}/{release_size[rel]} samples")
for rel in missing_rel:
for smp in missing_rel[rel]:
for rel, smplist in missing_rel.items():
for smp in smplist:
try:
DB[ARG.DATABASE]['cursor'].execute(READ['SAMPLE'], (smp,))
rows = DB[ARG.DATABASE]['cursor'].fetchall()
except MySQLdb.Error as err:
terminate_program(JRC.sql_error(err))
for row in rows:
outstream.write(f"{row['slide_code']}\t{row['workstation_sample_id']}\n")
outstream.write(f"{row['line']}\t{row['slide_code']}\t"
+ f"{row['workstation_sample_id']}\t{row['alps_release']}\n")


def perform_checks():
# Publishing
''' Prepare comparison dicts and perform checks
Keyword arguments:
None
Returns:
None
'''
try:
DB[ARG.DATABASE]['cursor'].execute(READ['ALL_SAMPLES'])
rows = DB[ARG.DATABASE]['cursor'].fetchall()
Expand All @@ -120,13 +155,14 @@ def perform_checks():
else:
release_size[row['alps_release']] += 1
# NeuronBridge
payload = {"libraryName": ARG.LIBRARY}
payload = {"libraryName": {"$in": ARG.LIBRARY}}
rows = COLL['publishedURL'].distinct("sampleRef", payload)
nb = {}
nbd = {}
for row in rows:
nb[row] = True
LOGGER.info("Found %d sample%s in NeuronBridge", len(nb), "" if len(nb) == 1 else "s")
analyze_results(published, release_size, nb)
nbd[row] = True
LOGGER.info("Found %d sample%s in NeuronBridge", len(nbd), "" if len(nbd) == 1 else "s")
# Report
analyze_results(published, release_size, nbd)


if __name__ == '__main__':
Expand All @@ -135,9 +171,12 @@ def perform_checks():
PARSER.add_argument('--library', dest='LIBRARY', action='store',
default='', help='color depth library')
PARSER.add_argument('--database', dest='DATABASE', action='store',
default='mbew', choices=['mbew', 'gen1mcfo', 'raw'], help='Publishing database')
default='mbew', choices=['mbew', 'gen1mcfo', 'raw'],
help='Publishing database')
PARSER.add_argument('--manifold', dest='MANIFOLD', action='store',
default='prod', choices=['dev', 'prod'], help='MongoDB manifold')
PARSER.add_argument('--excludenew', dest='EXCLUDENEW', action='store_true',
default=False, help='Exclude newly published releases')
PARSER.add_argument('--verbose', dest='VERBOSE', action='store_true',
default=False, help='Flag, Chatty')
PARSER.add_argument('--debug', dest='DEBUG', action='store_true',
Expand Down

0 comments on commit 8d21e84

Please sign in to comment.