alubbock · alubbock · Aug 8, 2024 · Aug 8, 2024 · Aug 8, 2024
diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml
@@ -32,8 +32,8 @@ jobs:
       run: |
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        # exit-zero treats all errors as warnings
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
     - name: Test with pytest
       run: |
         pytest --nbval doc/tutorial.ipynb --sanitize-with doc/pytest-sanitize.ini --cov=thunor

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -25,10 +25,9 @@ jobs:
         python-version: '3.x'
     - name: Install dependencies
       run: |
-        python -m pip install --upgrade pip
-        pip install setuptools wheel twine
+        python -m pip install --upgrade pip build
     - name: Build package
       run: |
-        python setup.py sdist bdist_wheel
+        python -m build --sdist --wheel .
     - name: Publish package distributions to PyPI
       uses: pypa/gh-action-pypi-publish@release/v1
diff --git a/doc/conf.py b/doc/conf.py
@@ -16,8 +16,11 @@
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
 #
+import mock
 import os
 import sys
+import thunor
+import pkg_resources
 import datetime
 sys.path.insert(0, os.path.abspath('../'))
 
@@ -59,8 +62,6 @@
 copyright = u'2017-' + str(datetime.datetime.now().year) + u' Alex Lubbock'
 author = u'Alex Lubbock'
 
-import thunor, pkg_resources
-
 # The full version, including alpha/beta/rc tags.
 release = thunor.__version__
 # The short X.Y version.
@@ -152,8 +153,8 @@
 # html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
-# the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
-# pixels large.
+# the docs.  This file should be a Windows icon file (.ico) being 16x16 or
+# 32x32 pixels large.
 #
 # html_favicon = None
 
@@ -328,10 +329,13 @@
      'Miscellaneous'),
 ]
 
+
 def setup(app):
-    app.add_js_file('https://cdnjs.cloudflare.com/ajax/libs/require.js/2.1.10/require.min.js')
+    app.add_js_file(
+        'https://cdnjs.cloudflare.com/ajax/libs/'
+        'require.js/2.1.10/require.min.js'
+    )
 
-import mock
 
 for mod_name in ('plotly', 'plotly.graph_objs', 'tables'):
     sys.modules[mod_name] = mock.MagicMock()

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,31 @@
+[project]
+name = "thunor"
+dynamic = ["version", "dependencies"]
+description = "Dose response curve and drug induced proliferation (DIP) rate fits and visualisation"
+authors = [
+    {name = "Alex Lubbock", email = "[email protected]"},
+]
+requires-python = ">=3.10"
+readme = "README.md"
+license = {text = "GPL-3.0-only"}
+classifiers = [
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python",
+    "Topic :: Scientific/Engineering :: Bio-Informatics",
+    "Topic :: Scientific/Engineering :: Chemistry",
+    "Topic :: Scientific/Engineering :: Medical Science Apps.",
+]
+
+[project.urls]
+Homepage = "https://www.thunor.net"
+
+[build-system]
+requires = ["setuptools", "versioneer-518"]
+build-backend = "setuptools.build_meta"
+
+[tool.pytest]
+norecursedirs = "doc/_build"
+
+[tool.flake8]
+extend-ignore = "E203"
+max-line-length = 88
diff --git a/setup.cfg b/setup.cfg
@@ -5,6 +5,3 @@ versionfile_source = thunor/_version.py
 versionfile_build = thunor/_version.py
 tag_prefix = v
 parentdir_prefix = thunor-
-
-[tool:pytest]
-norecursedirs = doc/_build
diff --git a/setup.py b/setup.py
@@ -19,7 +19,8 @@ def main():
         author_email='[email protected]',
         url='https://www.thunor.net',
         packages=['thunor', 'thunor.converters'],
-        install_requires=['numpy', 'scipy', 'pandas', 'plotly', 'seaborn', 'tables'],
+        install_requires=['numpy', 'scipy', 'pandas', 'plotly', 'seaborn',
+                          'tables'],
         tests_require=['pytest', 'nbval', 'django', 'nbformat'],
         cmdclass=versioneer.get_cmdclass(),
         zip_safe=True,

diff --git a/thunor/_version.py b/thunor/_version.py
@@ -378,7 +378,8 @@ def git_pieces_from_vcs(
         pieces["distance"] = len(out.split())  # total number of commits
 
     # commit date: see ISO-8601 comment in git_versions_from_keywords()
-    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip()
+    date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"],
+                  cwd=root)[0].strip()
     # Use only the last line.  Previous lines may contain GPG signature
     # information.
     date = date.splitlines()[-1]
@@ -468,10 +469,12 @@ def render_pep440_pre(pieces: Dict[str, Any]) -> str:
     if pieces["closest-tag"]:
         if pieces["distance"]:
             # update the post release segment
-            tag_version, post_version = pep440_split_post(pieces["closest-tag"])
+            tag_version, post_version = pep440_split_post(
+                pieces["closest-tag"])
             rendered = tag_version
             if post_version is not None:
-                rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"])
+                rendered += ".post%d.dev%d" % (post_version + 1,
+                                               pieces["distance"])
             else:
                 rendered += ".post0.dev%d" % (pieces["distance"])
         else:

diff --git a/thunor/converters/__init__.py b/thunor/converters/__init__.py
@@ -2,5 +2,5 @@
 from .ctrp2 import convert_ctrp
 from .teicher import convert_teicher
 
-__all__ = ['convert_gdsc', 'convert_gdsc_tags', 'convert_ctrp',
-           'convert_teicher']
+__all__ = ["convert_gdsc", "convert_gdsc_tags", "convert_ctrp",
+           "convert_teicher"]
diff --git a/thunor/converters/ctrp2.py b/thunor/converters/ctrp2.py
@@ -168,8 +168,8 @@ def convert_ctrp(directory='.',
         python -c "from thunor.converters import convert_ctrp; convert_ctrp()"
 
     This script will take several minutes to run, please be patient. It is also
-    resource-intensive, due to the size of the dataset. We recommend you utilize
-    the highest-spec machine that you have available.
+    resource-intensive, due to the size of the dataset. We recommend you
+    utilize the highest-spec machine that you have available.
 
     This will output a file called (by default) :file:`ctrp_v2.h5`,
     which can be opened with :func:`thunor.io.read_hdf()`, or used with Thunor

diff --git a/thunor/converters/gdsc.py b/thunor/converters/gdsc.py
@@ -69,7 +69,7 @@ def import_gdsc(drug_list_file, screen_data_file):
     df = screen_data
 
     # Drop the blank wells (no cells, no drugs)
-    df.drop(list(df.filter(regex='blank\d+')), axis=1, inplace=True)
+    df.drop(list(df.filter(regex=r'blank\d+')), axis=1, inplace=True)
 
     # Merge in the drug names
     df = df.merge(drug_ids, left_on='DRUG_ID', right_index=True)
@@ -151,7 +151,8 @@ def convert_gdsc_tags(cell_line_file='Cell_Lines_Details.xlsx',
     You can run this function at the command line to convert the files;
     assuming the downloaded file is in the current directory, simply run::
 
-        python -c "from thunor.converters import convert_gdsc_tags; convert_gdsc_tags()"
+        python -c "from thunor.converters import convert_gdsc_tags; \
+                   convert_gdsc_tags()"
 
     This will output a file called (by default)
     :file:`gdsc_cell_line_primary_site_tags.txt`, which can be loaded into
@@ -203,17 +204,17 @@ def convert_gdsc(drug_list_file='Screened_Compounds.xlsx',
     Please note that the layout of wells in each plate after conversion is
     arbitrary, since this information is not in the original files.
 
-    Please make sure you have the "tables" and "xlrd" python packages installed,
-    in addition to the standard Thunor Core requirements.
+    Please make sure you have the "tables" and "xlrd" python packages
+    installed, in addition to the standard Thunor Core requirements.
 
     You can run this function at the command line to convert the files;
     assuming the two files are in the current directory, simply run::
 
         python -c "from thunor.converters import convert_gdsc; convert_gdsc()"
 
     This script will take several minutes to run, please be patient. It is also
-    resource-intensive, due to the size of the dataset. We recommend you utilize
-    the highest-spec machine that you have available.
+    resource-intensive, due to the size of the dataset. We recommend you
+    utilize the highest-spec machine that you have available.
 
     This will output a file called (by default) :file:`gdsc-v17a.h5`,
     which can be opened with :func:`thunor.io.read_hdf()`, or used with Thunor

diff --git a/thunor/converters/teicher.py b/thunor/converters/teicher.py
@@ -88,7 +88,8 @@ def convert_teicher(directory='.', output_file='teicher.h5'):
     Unzip the downloaded file. The dataset can then be converted on the command
     line::
 
-        python -c "from thunor.converters import convert_teicher; convert_teicher()"
+        python -c "from thunor.converters import convert_teicher; \
+                   convert_teicher()"
 
     Please note that the layout of wells in each plate after conversion is
     arbitrary, since this information is not in the original files.

diff --git a/thunor/curve_fit.py b/thunor/curve_fit.py
@@ -507,7 +507,8 @@ def fit_drc(doses, responses, response_std_errs=None, fit_cls=HillCurveLL4,
     except TypeError as te:
         # This occurs if there are fewer data points than parameters
         te_str = str(te)
-        if 'Improper input:' in te_str or te_str.startswith('The number of func parameters'):
+        if 'Improper input:' in te_str or te_str.startswith(
+                'The number of func parameters'):
             warnings.warn(te_str)
             return None
         else:
@@ -950,7 +951,7 @@ def _generate_label(index):
 
     if not is_viability and include_emax:
         divisor = base_params['fit_obj'].apply(lambda fo: fo.divisor if fo
-            else None)
+                                               else None)
         base_params['emax_rel'] = base_params['emax'] / divisor
         base_params['emax_obs_rel'] = base_params['emax_obs'] / divisor
 
@@ -1004,7 +1005,7 @@ def _attach_response_values(df_params, ctrl_dip_data, expt_dip_data,
         doses_expt = [d[0] for d in dip_grp.index.get_level_values(
             'dose').values]
         fit_data = {'dataset_id': grp[0],
-            'cell_line': grp[1], 'drug': grp[2][0]}
+                    'cell_line': grp[1], 'drug': grp[2][0]}
 
         ctrl_dip_data_cl = \
             _get_control_responses(ctrl_dip_data, grp[0], grp[1],

diff --git a/thunor/dip.py b/thunor/dip.py
@@ -72,8 +72,9 @@ def dip_rates(df_data, selector_fn=tyson1):
 
     df_assays = df_data.assays.loc[df_data.dip_assay_name]
 
-    return ctrl_dips, \
-           expt_dip_rates(df_data.doses, df_assays, selector_fn=selector_fn)
+    return ctrl_dips, expt_dip_rates(df_data.doses,
+                                     df_assays,
+                                     selector_fn=selector_fn)
 
 
 def expt_dip_rates(df_doses, df_vals, selector_fn=tyson1):

diff --git a/thunor/io.py b/thunor/io.py
@@ -122,9 +122,10 @@ def well_name_to_id(self, well_name, raise_error=True):
                 raise ValueError('Well name too short')
 
             if len(well_name) > 2 and well_name[1].isalpha():
-                row_num_mult = ord(well_name[0]) - 64 # one-based
+                row_num_mult = ord(well_name[0]) - 64  # one-based
                 if row_num_mult < 0 or row_num_mult > 25:
-                    raise ValueError('First letter is not capital alphanumeric')
+                    raise ValueError(
+                        'First letter is not capital alphanumeric')
                 row_num = ord(well_name[1]) - 65  # zero-based
                 row_num += (row_num_mult * 26)
                 col_num_start = 2
@@ -156,7 +157,8 @@ def well_iterator(self):
         -------
         Iterator of dict
             Iterator over the wells in the plate. Each well is given as a dict
-            of 'well' (well ID), 'row' (row character) and 'col' (column number)
+            of 'well' (well ID), 'row' (row character) and 'col'
+            (column number)
         """
         row_it = iter(np.repeat(list(self.row_iterator()), self.width))
         col_it = itertools.cycle(self.col_iterator())
@@ -501,8 +503,7 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
                          converters={
                              'time': _time_parser,
                              'well': lambda w: pm.well_name_to_id(w),
-                             'expt.date': lambda
-                                 d: datetime.strptime(
+                             'expt.date': lambda d: datetime.strptime(
                                  d, '%Y-%m-%d').date()
                          },
                          sep=sep
@@ -517,7 +518,8 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
         elif errstr.startswith('invalid literal for int() with base 10'):
             raise PlateFileParseException(
                 'Invalid value for cell count ({})'.format(errstr))
-        elif errstr.startswith('time data') and 'does not match format' in errstr:
+        elif errstr.startswith('time data') and \
+                'does not match format' in errstr:
             raise PlateFileParseException(
                 'Date format should be YYYY-MM-DD ({})'.format(errstr))
         else:
@@ -526,7 +528,8 @@ def _read_vanderbilt_hts_single_df(file_or_source, plate_width=24,
     try:
         df.set_index(['upid', 'well'], inplace=True)
     except KeyError:
-        raise PlateFileParseException('Please ensure columns "upid" and "well" are present')
+        raise PlateFileParseException(
+            'Please ensure columns "upid" and "well" are present')
 
     required_columns = {'upid', 'cell.count', 'time'}
     missing_cols = required_columns.difference(set(df.columns))
@@ -644,17 +647,16 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
 
             if du != 'M':
                 raise PlateFileParseException(
-                    'Only supported drug concentration unit is M (not {})'.
-                        format(du))
+                    f'Only supported drug concentration unit is M (not {du})')
         drug_nums.append(drug_no)
         drug_no += 1
 
     has_annotation = True
     if drug_nums:
         if 'cell.line' not in df.columns:
             raise PlateFileParseException(
-                'cell.line column is not present, but drug and/or dose columns '
-                'are present. ' + ANNOTATION_MSG
+                'cell.line column is not present, but drug and/or dose '
+                'columns are present. ' + ANNOTATION_MSG
             )
     else:
         if 'cell.line' in df.columns:
@@ -690,8 +692,8 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
     # Check for duplicate time point definitions
     dup_timepoints = df.set_index('time', append=True)
     if dup_timepoints.index.duplicated().any():
-        dups = dup_timepoints.loc[dup_timepoints.index.duplicated(),
-               :].index.tolist()
+        dups = dup_timepoints.loc[
+            dup_timepoints.index.duplicated(), :].index.tolist()
         n_dups = len(dups)
         first_dup = dups[0]
 
@@ -723,7 +725,7 @@ def read_vanderbilt_hts(file_or_source, plate_width=24, plate_height=16,
              zip(df_doses["upid"], df_doses["well"])]))
         df_doses = df_doses.drop_duplicates(subset='well')
         col_renames = {'drug{}.conc'.format(n): 'dose{}'.format(n) for
-                                 n in drug_nums}
+                       n in drug_nums}
         col_renames.update({
             'cell.line': 'cell_line',
             'well': 'well_id',
@@ -1006,7 +1008,7 @@ def read_incucyte(filename_or_buffer, plate_width=24, plate_height=16):
     elif hasattr(filename_or_buffer, 'name'):
         plate_name = filename_or_buffer.name
 
-    def _incucyte_header(filedat):
+    def _incucyte_header(filedat, plate_name, cell_type):
         for line_no, line in enumerate(filedat):
             if line.startswith(LABEL_STR):
                 new_plate_name = line[len(LABEL_STR):].strip()
@@ -1015,18 +1017,20 @@ def _incucyte_header(filedat):
             elif line.startswith(CELL_TYPE_STR):
                 cell_type = line[len(CELL_TYPE_STR):].strip()
             elif line.startswith(TSV_START_STR):
-                return line_no
-        return None
+                return line_no, plate_name, cell_type
+        return None, plate_name, cell_type
 
     if isinstance(filename_or_buffer, io.BytesIO):
         filedat = io.TextIOWrapper(filename_or_buffer,
                                    encoding='utf-8')
-        line_no = _incucyte_header(filedat)
+        line_no, plate_name, cell_type = _incucyte_header(
+            filedat, plate_name, cell_type)
         filedat.detach()
         filename_or_buffer.seek(0)
     else:
         with open(filename_or_buffer, 'r') as f:
-            line_no = _incucyte_header(f)
+            line_no, plate_name, cell_type = _incucyte_header(
+                f, plate_name, cell_type)
 
     if line_no is None:
         raise PlateFileParseException('Does not appear to be an Incucyte '