Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

style: implement pyproject.toml, flake8 suggestions #26

Merged
merged 2 commits into from
Aug 8, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,8 @@ jobs:
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
# exit-zero treats all errors as warnings
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
- name: Test with pytest
run: |
pytest --nbval doc/tutorial.ipynb --sanitize-with doc/pytest-sanitize.ini --cov=thunor
Expand Down
5 changes: 2 additions & 3 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,9 @@ jobs:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install setuptools wheel twine
python -m pip install --upgrade pip build
- name: Build package
run: |
python setup.py sdist bdist_wheel
python -m build --sdist --wheel .
- name: Publish package distributions to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
16 changes: 10 additions & 6 deletions doc/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,11 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import mock
import os
import sys
import thunor
import pkg_resources
import datetime
sys.path.insert(0, os.path.abspath('../'))

Expand Down Expand Up @@ -59,8 +62,6 @@
copyright = u'2017-' + str(datetime.datetime.now().year) + u' Alex Lubbock'
author = u'Alex Lubbock'

import thunor, pkg_resources

# The full version, including alpha/beta/rc tags.
release = thunor.__version__
# The short X.Y version.
Expand Down Expand Up @@ -152,8 +153,8 @@
# html_logo = None

# The name of an image file (relative to this directory) to use as a favicon of
# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32
# pixels large.
# the docs. This file should be a Windows icon file (.ico) being 16x16 or
# 32x32 pixels large.
#
# html_favicon = None

Expand Down Expand Up @@ -328,10 +329,13 @@
'Miscellaneous'),
]


def setup(app):
app.add_js_file('https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js')
app.add_js_file(
'https://cdnjs.cloudflare.com/ajax/libs/'
'require.js/2.1.10/require.min.js'
)

import mock

for mod_name in ('plotly', 'plotly.graph_objs', 'tables'):
sys.modules[mod_name] = mock.MagicMock()
Expand Down
31 changes: 31 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
[project]
name = "thunor"
dynamic = ["version", "dependencies"]
description = "Dose response curve and drug induced proliferation (DIP) rate fits and visualisation"
authors = [
{name = "Alex Lubbock", email = "[email protected]"},
]
requires-python = ">=3.10"
readme = "README.md"
license = {text = "GPL-3.0-only"}
classifiers = [
"Intended Audience :: Science/Research",
"Programming Language :: Python",
"Topic :: Scientific/Engineering :: Bio-Informatics",
"Topic :: Scientific/Engineering :: Chemistry",
"Topic :: Scientific/Engineering :: Medical Science Apps.",
]

[project.urls]
Homepage = "https://www.thunor.net"

[build-system]
requires = ["setuptools", "versioneer-518"]
build-backend = "setuptools.build_meta"

[tool.pytest]
norecursedirs = "doc/_build"

[tool.flake8]
extend-ignore = "E203"
max-line-length = 88
3 changes: 0 additions & 3 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,3 @@ versionfile_source = thunor/_version.py
versionfile_build = thunor/_version.py
tag_prefix = v
parentdir_prefix = thunor-

[tool:pytest]
norecursedirs = doc/_build
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@ def main():
author_email='[email protected]',
url='https://www.thunor.net',
packages=['thunor', 'thunor.converters'],
install_requires=['numpy', 'scipy', 'pandas', 'plotly', 'seaborn', 'tables'],
install_requires=['numpy', 'scipy', 'pandas', 'plotly', 'seaborn',
'tables'],
tests_require=['pytest', 'nbval', 'django', 'nbformat'],
cmdclass=versioneer.get_cmdclass(),
zip_safe=True,
Expand Down
9 changes: 6 additions & 3 deletions thunor/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,7 +378,8 @@ def git_pieces_from_vcs(
pieces["distance"] = len(out.split()) # total number of commits

# commit date: see ISO-8601 comment in git_versions_from_keywords()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"],
cwd=root)[0].strip()
# Use only the last line. Previous lines may contain GPG signature
# information.
date = date.splitlines()[-1]
Expand Down Expand Up @@ -468,10 +469,12 @@ def render_pep440_pre(pieces: Dict[str, Any]) -> str:
if pieces["closest-tag"]:
if pieces["distance"]:
# update the post release segment
tag_version, post_version = pep440_split_post(pieces["closest-tag"])
tag_version, post_version = pep440_split_post(
pieces["closest-tag"])
rendered = tag_version
if post_version is not None:
rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
rendered += ".post%d.dev%d" % (post_version + 1,
pieces["distance"])
else:
rendered += ".post0.dev%d" % (pieces["distance"])
else:
Expand Down
4 changes: 2 additions & 2 deletions thunor/converters/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@
from .ctrp2 import convert_ctrp
from .teicher import convert_teicher

__all__ = ['convert_gdsc', 'convert_gdsc_tags', 'convert_ctrp',
'convert_teicher']
__all__ = ["convert_gdsc", "convert_gdsc_tags", "convert_ctrp",
"convert_teicher"]
4 changes: 2 additions & 2 deletions thunor/converters/ctrp2.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,8 +168,8 @@ def convert_ctrp(directory='.',
python -c "from thunor.converters import convert_ctrp; convert_ctrp()"

This script will take several minutes to run, please be patient. It is also
resource-intensive, due to the size of the dataset. We recommend you utilize
the highest-spec machine that you have available.
resource-intensive, due to the size of the dataset. We recommend you
utilize the highest-spec machine that you have available.

This will output a file called (by default) :file:`ctrp_v2.h5`,
which can be opened with :func:`thunor.io.read_hdf()`, or used with Thunor
Expand Down
13 changes: 7 additions & 6 deletions thunor/converters/gdsc.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def import_gdsc(drug_list_file, screen_data_file):
df = screen_data

# Drop the blank wells (no cells, no drugs)
df.drop(list(df.filter(regex='blank\d+')), axis=1, inplace=True)
df.drop(list(df.filter(regex=r'blank\d+')), axis=1, inplace=True)

# Merge in the drug names
df = df.merge(drug_ids, left_on='DRUG_ID', right_index=True)
Expand Down Expand Up @@ -151,7 +151,8 @@ def convert_gdsc_tags(cell_line_file='Cell_Lines_Details.xlsx',
You can run this function at the command line to convert the files;
assuming the downloaded file is in the current directory, simply run::

python -c "from thunor.converters import convert_gdsc_tags; convert_gdsc_tags()"
python -c "from thunor.converters import convert_gdsc_tags; \
convert_gdsc_tags()"

This will output a file called (by default)
:file:`gdsc_cell_line_primary_site_tags.txt`, which can be loaded into
Expand Down Expand Up @@ -203,17 +204,17 @@ def convert_gdsc(drug_list_file='Screened_Compounds.xlsx',
Please note that the layout of wells in each plate after conversion is
arbitrary, since this information is not in the original files.

Please make sure you have the "tables" and "xlrd" python packages installed,
in addition to the standard Thunor Core requirements.
Please make sure you have the "tables" and "xlrd" python packages
installed, in addition to the standard Thunor Core requirements.

You can run this function at the command line to convert the files;
assuming the two files are in the current directory, simply run::

python -c "from thunor.converters import convert_gdsc; convert_gdsc()"

This script will take several minutes to run, please be patient. It is also
resource-intensive, due to the size of the dataset. We recommend you utilize
the highest-spec machine that you have available.
resource-intensive, due to the size of the dataset. We recommend you
utilize the highest-spec machine that you have available.

This will output a file called (by default) :file:`gdsc-v17a.h5`,
which can be opened with :func:`thunor.io.read_hdf()`, or used with Thunor
Expand Down
3 changes: 2 additions & 1 deletion thunor/converters/teicher.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,8 @@ def convert_teicher(directory='.', output_file='teicher.h5'):
Unzip the downloaded file. The dataset can then be converted on the command
line::

python -c "from thunor.converters import convert_teicher; convert_teicher()"
python -c "from thunor.converters import convert_teicher; \
convert_teicher()"

Please note that the layout of wells in each plate after conversion is
arbitrary, since this information is not in the original files.
Expand Down
7 changes: 4 additions & 3 deletions thunor/curve_fit.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,8 @@ def fit_drc(doses, responses, response_std_errs=None, fit_cls=HillCurveLL4,
except TypeError as te:
# This occurs if there are fewer data points than parameters
te_str = str(te)
if 'Improper input:' in te_str or te_str.startswith('The number of func parameters'):
if 'Improper input:' in te_str or te_str.startswith(
'The number of func parameters'):
warnings.warn(te_str)
return None
else:
Expand Down Expand Up @@ -950,7 +951,7 @@ def _generate_label(index):

if not is_viability and include_emax:
divisor = base_params['fit_obj'].apply(lambda fo: fo.divisor if fo
else None)
else None)
base_params['emax_rel'] = base_params['emax'] / divisor
base_params['emax_obs_rel'] = base_params['emax_obs'] / divisor

Expand Down Expand Up @@ -1004,7 +1005,7 @@ def _attach_response_values(df_params, ctrl_dip_data, expt_dip_data,
doses_expt = [d[0] for d in dip_grp.index.get_level_values(
'dose').values]
fit_data = {'dataset_id': grp[0],
'cell_line': grp[1], 'drug': grp[2][0]}
'cell_line': grp[1], 'drug': grp[2][0]}

ctrl_dip_data_cl = \
_get_control_responses(ctrl_dip_data, grp[0], grp[1],
Expand Down
5 changes: 3 additions & 2 deletions thunor/dip.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,9 @@ def dip_rates(df_data, selector_fn=tyson1):

df_assays = df_data.assays.loc[df_data.dip_assay_name]

return ctrl_dips, \
expt_dip_rates(df_data.doses, df_assays, selector_fn=selector_fn)
return ctrl_dips, expt_dip_rates(df_data.doses,
df_assays,
selector_fn=selector_fn)


def expt_dip_rates(df_doses, df_vals, selector_fn=tyson1):
Expand Down
42 changes: 23 additions & 19 deletions thunor/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,10 @@ def well_name_to_id(self, well_name, raise_error=True):
raise ValueError('Well name too short')

if len(well_name) > 2 and well_name[1].isalpha():
row_num_mult = ord(well_name[0]) - 64 # one-based
row_num_mult = ord(well_name[0]) - 64 # one-based
if row_num_mult < 0 or row_num_mult > 25:
raise ValueError('First letter is not capital alphanumeric')
raise ValueError(
'First letter is not capital alphanumeric')
row_num = ord(well_name[1]) - 65 # zero-based
row_num += (row_num_mult * 26)
col_num_start = 2
Expand Down Expand Up @@ -156,7 +157,8 @@ def well_iterator(self):
-------
Iterator of dict
Iterator over the wells in the plate. Each well is given as a dict
of 'well' (well ID), 'row' (row character) and 'col' (column number)
of 'well' (well ID), 'row' (row character) and 'col'
(column number)
"""
row_it = iter(np.repeat(list(self.row_iterator()), self.width))
col_it = itertools.cycle(self.col_iterator())
Expand Down Expand Up @@ -501,8 +503,7 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
converters={
'time': _time_parser,
'well': lambda w: pm.well_name_to_id(w),
'expt.date': lambda
d: datetime.strptime(
'expt.date': lambda d: datetime.strptime(
d, '%Y-%m-%d').date()
},
sep=sep
Expand All @@ -517,7 +518,8 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
elif errstr.startswith('invalid literal for int() with base 10'):
raise PlateFileParseException(
'Invalid value for cell count ({})'.format(errstr))
elif errstr.startswith('time data') and 'does not match format' in errstr:
elif errstr.startswith('time data') and \
'does not match format' in errstr:
raise PlateFileParseException(
'Date format should be YYYY-MM-DD ({})'.format(errstr))
else:
Expand All @@ -526,7 +528,8 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
try:
df.set_index(['upid', 'well'], inplace=True)
except KeyError:
raise PlateFileParseException('Please ensure columns "upid" and "well" are present')
raise PlateFileParseException(
'Please ensure columns "upid" and "well" are present')

required_columns = {'upid', 'cell.count', 'time'}
missing_cols = required_columns.difference(set(df.columns))
Expand Down Expand Up @@ -644,17 +647,16 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,

if du != 'M':
raise PlateFileParseException(
'Only supported drug concentration unit is M (not {})'.
format(du))
f'Only supported drug concentration unit is M (not {du})')
drug_nums.append(drug_no)
drug_no += 1

has_annotation = True
if drug_nums:
if 'cell.line' not in df.columns:
raise PlateFileParseException(
'cell.line column is not present, but drug and/or dose columns '
'are present. ' + ANNOTATION_MSG
'cell.line column is not present, but drug and/or dose '
'columns are present. ' + ANNOTATION_MSG
)
else:
if 'cell.line' in df.columns:
Expand Down Expand Up @@ -690,8 +692,8 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
# Check for duplicate time point definitions
dup_timepoints = df.set_index('time', append=True)
if dup_timepoints.index.duplicated().any():
dups = dup_timepoints.loc[dup_timepoints.index.duplicated(),
:].index.tolist()
dups = dup_timepoints.loc[
dup_timepoints.index.duplicated(), :].index.tolist()
n_dups = len(dups)
first_dup = dups[0]

Expand Down Expand Up @@ -723,7 +725,7 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
zip(df_doses["upid"], df_doses["well"])]))
df_doses = df_doses.drop_duplicates(subset='well')
col_renames = {'drug{}.conc'.format(n): 'dose{}'.format(n) for
n in drug_nums}
n in drug_nums}
col_renames.update({
'cell.line': 'cell_line',
'well': 'well_id',
Expand Down Expand Up @@ -1006,7 +1008,7 @@ def read_incucyte(filename_or_buffer, plate_width=24, plate_height=16):
elif hasattr(filename_or_buffer, 'name'):
plate_name = filename_or_buffer.name

def _incucyte_header(filedat):
def _incucyte_header(filedat, plate_name, cell_type):
for line_no, line in enumerate(filedat):
if line.startswith(LABEL_STR):
new_plate_name = line[len(LABEL_STR):].strip()
Expand All @@ -1015,18 +1017,20 @@ def _incucyte_header(filedat):
elif line.startswith(CELL_TYPE_STR):
cell_type = line[len(CELL_TYPE_STR):].strip()
elif line.startswith(TSV_START_STR):
return line_no
return None
return line_no, plate_name, cell_type
return None, plate_name, cell_type

if isinstance(filename_or_buffer, io.BytesIO):
filedat = io.TextIOWrapper(filename_or_buffer,
encoding='utf-8')
line_no = _incucyte_header(filedat)
line_no, plate_name, cell_type = _incucyte_header(
filedat, plate_name, cell_type)
filedat.detach()
filename_or_buffer.seek(0)
else:
with open(filename_or_buffer, 'r') as f:
line_no = _incucyte_header(f)
line_no, plate_name, cell_type = _incucyte_header(
f, plate_name, cell_type)

if line_no is None:
raise PlateFileParseException('Does not appear to be an Incucyte '
Expand Down
Loading
Loading