Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/BAMresearch/McSAS3
Browse files Browse the repository at this point in the history
  • Loading branch information
toqduj committed Jan 4, 2023
2 parents 5905566 + a47b993 commit c1b4b8a
Show file tree
Hide file tree
Showing 15 changed files with 205 additions and 276 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ McSAS3 (a refactored version of the original McSAS) fits scattering patterns to
6. Some bugs remain. Feel free to add bugs to the issues. They will be fixed as time permits.

## Installation
This package can be installed by ensuring that 1) you have SasModels (pip install sasmodels) and 2) the most recent 21.4+ version of attrs. After that, you can do
This package can be installed by ensuring that 1) you have SasModels (pip install sasmodels) and 2) the most recent 21.4+ version of attrs, as well as pandas. After that, you can do
```git clone https://github.com/BAMresearch/McSAS3.git``` in an appropriate location to install McSAS3
On Windows, if you want to use the sasmodels library, it is highly recommended to run ```pip install tinycc``` so that there's a compatible compiler available.

Expand Down
61 changes: 22 additions & 39 deletions mcsas3/McData.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from ast import Str
from typing import Optional
import numpy as np
import pandas
import h5py
from mcsas3.McHDF import McHDF
from pathlib import Path

# todo use attrs to @define a McData dataclass

class McData(McHDF):
"""
Expand Down Expand Up @@ -61,11 +64,11 @@ class McData(McHDF):

def __init__(
self,
df: pandas.DataFrame = None,
loadFromFile: Path = None,
resultIndex=1,
**kwargs,
):
df: Optional[pandas.DataFrame] = None,
loadFromFile: Optional[Path] = None,
resultIndex:int=1,
**kwargs:dict,
)-> None:

# reset everything so we're sure not to inherit anything from elsewhere:
self.filename = None # input filename
Expand Down Expand Up @@ -94,16 +97,16 @@ def __init__(
if loadFromFile is not None:
self.load(loadFromFile)

def processKwargs(self, **kwargs):
def processKwargs(self, **kwargs:dict)->None:
for key, value in kwargs.items():
assert key in self.storeKeys, "Key {} is not a valid option".format(key)
setattr(self, key, value)

def linkMeasData(self, measDataLink=None):
def linkMeasData(self, measDataLink:str=None)-> None:
assert False, "defined in 1D and 2D subclasses"
pass

def from_file(self, filename=None):
def from_file(self, filename:Optional[Path]=None)->None:
if filename is None:
assert (
self.filename is not None
Expand Down Expand Up @@ -133,15 +136,15 @@ def from_file(self, filename=None):
False
), "Input file type could not be determined. Use from_pandas to load a dataframe or use df = [DataFrame] in input, or use 'loader' = 'from_pdh' or 'from_csv' in input"

def from_pandas(self, df=None):
def from_pandas(self, df:pandas.DataFrame=None)->None:
assert False, "defined in 1D and 2D subclasses"
pass

def from_csv(self, filename=None, csvargs=None):
def from_csv(self, filename:Path=None, csvargs=None)->None:
assert False, "defined in 1D and 2D subclasses"
pass

def from_pdh(self, filename=None):
def from_pdh(self, filename:Path=None)->None:
assert False, "defined in 1D subclass only"
pass

Expand All @@ -151,7 +154,7 @@ def from_pdh(self, filename=None):
# pass

# universal reader for 1D and 2D!
def from_nexus(self, filename=None):
def from_nexus(self, filename:Optional[Path]=None)->None:
# optionally, path can be defined as a dict to point at Q, I and ISigma entries.
def objBytesToStr(inObject):
outObject = inObject
Expand Down Expand Up @@ -275,22 +278,22 @@ def objBytesToStr(inObject):
self.rawData = pandas.DataFrame(data=self.rawData)
self.prepare()

def is2D(self):
def is2D(self)->bool:
return self.rawData2D is not None

def clip(self):
def clip(self)->None:
assert False, "defined in 1D and 2D subclasses"
pass

def omit(self):
def omit(self)->None:
assert False, "defined in the 1D and (maybe) 2D subclasses"
pass

def reBin(self):
def reBin(self)->None:
assert False, "defined in 1D and 2D subclasses"
pass

def prepare(self):
def prepare(self)->None:
"""runs the clipping and binning (in that order), populates clippedData and binnedData"""
self.clip()
self.omit()
Expand All @@ -300,7 +303,7 @@ def prepare(self):
self.binnedData = self.clippedData.copy()
self.linkMeasData()

def store(self, filename=None, path=None):
def store(self, filename:Path, path:Optional[str]=None)->None: # path:str|None
"""stores the settings in an output file (HDF5)"""
if path is None:
path = f"{self.nxsEntryPoint}mcdata/"
Expand All @@ -309,7 +312,7 @@ def store(self, filename=None, path=None):
value = getattr(self, key, None)
self._HDFstoreKV(filename=filename, path=path, key=key, value=value)

def load(self, filename: Path = None, path=None):
def load(self, filename: Path, path:Optional[str]=None)->None:
if path is None:
path = f"{self.nxsEntryPoint}mcdata/"
assert filename is not None
Expand Down Expand Up @@ -342,24 +345,4 @@ def load(self, filename: Path = None, path=None):
self.from_file() # try loading the data from the original file
self.prepare()

# ### functions to extend the use of McData class to simulated model data
# def polate (self):
# """ Interpolates and extrapolates the data, for use with scale """
# assert False, "defined in 1D or 2D subclass"
# pass

# def interpolate(self, method = None):
# """ Interpolates the data, for use with scale """
# assert False, "defined in 1D or 2D subclass"
# pass

# def scale(self, Rscale:float = 1.):
# """ scales the dataset in Q to "pretend" to be an isoaxial R-scaling"""
# assert False, "defined in 1D or 2D subclass"
# pass

# def extrapolate(self, method = None):
# """ extrapolates the dataset beyond min and max (for use with scale) """
# assert False, "defined in 1D or 2D subclass"
# pass

19 changes: 10 additions & 9 deletions mcsas3/McData1D.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from typing import Optional
import numpy as np
import pandas
from .McData import McData
Expand All @@ -14,8 +15,8 @@ class McData1D(McData):
omitQRanges = None # to skip or omit unwanted data ranges, for example with sharp XRD peaks

def __init__(
self, df: pandas.DataFrame = None, loadFromFile: Path = None, resultIndex = 1, **kwargs
):
self, df: Optional[pandas.DataFrame] = None, loadFromFile: Optional[Path] = None, resultIndex:int = 1, **kwargs:dict
)-> None:
super().__init__(loadFromFile=loadFromFile, resultIndex=resultIndex, **kwargs)
self.csvargs = {
"sep": r"\s+",
Expand All @@ -38,7 +39,7 @@ def __init__(
self.from_file(self.filename)
# link measData to the requested value

def linkMeasData(self, measDataLink=None):
def linkMeasData(self, measDataLink: Optional[str]=None)->None: # measDataLink:str|None
if measDataLink is None:
measDataLink = self.measDataLink
assert measDataLink in [
Expand All @@ -53,7 +54,7 @@ def linkMeasData(self, measDataLink=None):
ISigma=measDataObj.ISigma.values,
)

def from_pdh(self, filename=None):
def from_pdh(self, filename:Path)->None:
"""reads from a PDH file, re-uses Ingo Bressler's code from the notebook example"""
assert filename is not None, "from_pdh requires an input filename of a PDH file"
skiprows, nrows = 5, -1
Expand All @@ -65,7 +66,7 @@ def from_pdh(self, filename=None):
csvargs.update({"skiprows": skiprows, "nrows": nrows[0] - skiprows})
self.from_pandas(pandas.read_csv(filename, **csvargs))

def from_pandas(self, df=None):
def from_pandas(self, df:pandas.DataFrame)->None:
"""uses a dataframe as input, should contain 'Q', 'I', and 'ISigma'"""
assert isinstance(
df, pandas.DataFrame
Expand All @@ -80,14 +81,14 @@ def from_pandas(self, df=None):
self.rawData = df
self.prepare()

def from_csv(self, filename, csvargs={}):
def from_csv(self, filename:Path, csvargs:dict={})->None:
"""reads from a three-column csv file, takes pandas from_csv arguments"""
assert filename is not None, "from_csv requires an input filename of a csv file"
localCsvargs = self.csvargs.copy()
localCsvargs.update(csvargs)
self.from_pandas(pandas.read_csv(filename, **localCsvargs))

def clip(self):
def clip(self)->None:
self.clippedData = (
self.rawData.query(f"{self.dataRange[0]} <= Q < {self.dataRange[1]}")
.dropna()
Expand All @@ -97,7 +98,7 @@ def clip(self):
len(self.clippedData) != 0
), "Data clipping range too small, no datapoints found!"

def omit(self):
def omit(self)->None:
# this can skip/omit unwanted ranges of data (for example a data range with an unwanted XRD peak in it)
# requires an "omitQRanges" list of [[qmin, qmax]]-data ranges to omit

Expand All @@ -112,7 +113,7 @@ def omit(self):
inplace=True
)

def reBin(self, nbins=None, IEMin=0.01, QEMin=0.01):
def reBin(self, nbins:Optional[int]=None, IEMin:float=0.01, QEMin:float=0.01) -> None: # nbins:int|None
"""Unweighted rebinning funcionality with extended uncertainty estimation, adapted from the datamerge methods, as implemented in Paulina's notebook of spring 2020"""
if nbins is None:
nbins = self.nbins
Expand Down
45 changes: 12 additions & 33 deletions mcsas3/McData2D.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from typing import Optional
import numpy as np
import pandas
from .McData import McData
import h5py
import logging
from pathlib import Path

class McData2D(McData):
"""subclass for managing 2D datasets. Copied from 1D dataset handler, not every functionality is enabled"""
Expand All @@ -20,13 +22,9 @@ class McData2D(McData):
0,
] # nudge in direction 0 and 1 in case of misaligned centers. Applied to measData

def __init__(self, df=None, loadFromFile=None, resultIndex=1, **kwargs):
def __init__(self, df=None, loadFromFile=None, resultIndex:int=1, **kwargs:dict)-> None:
super().__init__(resultIndex=resultIndex, **kwargs)
self.csvargs = {
"sep": r"\s+",
"header": None,
"names": ["Q", "I", "ISigma"],
} # default for 1D, overwritten in subclass
self.csvargs = {} # not sure you'd want to load 2D from a CSV.... though I've seen stranger things
self.dataRange = [0, np.inf] # min-max for data range to fit
self.orthoQ1Range = [0, np.inf]
self.orthoQ0Range = [0, np.inf]
Expand All @@ -38,11 +36,12 @@ def __init__(self, df=None, loadFromFile=None, resultIndex=1, **kwargs):
self.loader = "from_pandas" # TODO: need to handle this on restore state
self.from_pandas(df)

# TODO not sure why loadFromFile is not used..
elif self.filename is not None: # filename has been set
self.from_file(self.filename)
# link measData to the requested value

def linkMeasData(self, measDataLink=None):
def linkMeasData(self, measDataLink:Optional[str]=None)-> None:
if measDataLink is None:
measDataLink = self.measDataLink
assert measDataLink in [
Expand All @@ -60,35 +59,15 @@ def linkMeasData(self, measDataLink=None):
ISigma=measDataObj["ISigma"],
)

def from_pandas(self, df=None):
def from_pandas(self, df:pandas.DataFrame=None)->None:
assert False, "2D data from_pandas not implemented yet"
pass

# """uses a dataframe as input, should contain 'Q', 'I', and 'ISigma'"""
# assert isinstance(
# df, pandas.DataFrame
# ), "from_pandas requires a pandas DataFrame with 'Q', 'I', and 'ISigma'"
# # maybe add a check for the keys:
# assert all(
# [key in df.keys() for key in ["Q", "I", "ISigma"]]
# ), "from_pandas requires the dataframe to contain 'Q', 'I', and 'ISigma'"
# assert all(
# [df[key].dtype.kind in 'f' for key in ["Q", "I", "ISigma"]]
# ), "data could not be read correctly. If csv, did you supply the right csvargs?"
# self.rawData = df
# self.prepare()

def from_csv(self, filename, csvargs={}):
def from_csv(self, filename:Path, csvargs:dict={})->None:
assert False, "2D data from_csv not implemented yet"
pass

# """reads from a three-column csv file, takes pandas from_csv arguments"""
# assert filename is not None, "from_csv requires an input filename of a csv file"
# localCsvargs = self.csvargs.copy()
# localCsvargs.update(csvargs)
# self.from_pandas(pandas.read_csv(filename, **localCsvargs))

def clip(self):
def clip(self) -> None:

# copied from a jupyter notebook:
# test with directly imported data
Expand Down Expand Up @@ -162,13 +141,13 @@ def clip(self):
(self.clippedData["Q"][1]).max(),
]

def omit(self):
def omit(self)-> None:
# this can skip/omit unwanted ranges of data (for example a data range with an unwanted XRD peak in it)
# requires an "omitQRanges" list of [[qmin, qmax]]-data ranges to omit
logging.warning("Omitting ranges not implemented yet for 2D")
pass

def reconstruct2D(self, modelI1D):
def reconstruct2D(self, modelI1D: np.ndarray) -> np.ndarray:
"""
Reconstructs a masked 2D data array from the (1D) model intensity, skipping the masked and clipped pixels (left as NaN)
This function can be used to plot the resulting model intensity and comparing it with self.clippedData["I2D"]
Expand All @@ -178,7 +157,7 @@ def reconstruct2D(self, modelI1D):
RMI[np.where(self.clippedData["invMask"])] = modelI1D
return RMI

def reBin(self, nbins=None, IEMin=0.01, QEMin=0.01):
def reBin(self, nbins:Optional[int]=None, IEMin:float=0.01, QEMin:float=0.01)->None:
print("2D data rebinning not implemented, binnedData = clippedData for now")
self.binnedData = self.clippedData

8 changes: 4 additions & 4 deletions mcsas3/McHDF.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ class McHDF(object):
resultIndex = 1 # per default number 1, but can be changed.
nxsEntryPoint = f"/analyses/MCResult{resultIndex}/" # changed to full path to result, not sure if this can work like this

def __init__(self):
def __init__(self)->None:
pass

def _HDFSetResultIndex(self, resultIndex=None):
def _HDFSetResultIndex(self, resultIndex:int)->None:
# resultIndex = -1 should go to the last existing one
#
assert (
Expand All @@ -26,7 +26,7 @@ def _HDFSetResultIndex(self, resultIndex=None):
self.resultIndex = resultIndex
self.nxsEntryPoint = f"/analyses/MCResult{self.resultIndex}/"

def _HDFloadKV(self, filename=None, path=None, datatype=None, default=None):
def _HDFloadKV(self, filename:Path, path: str, datatype=None, default=None): # outputs any hdf5 value type
with h5py.File(filename, "r") as h5f:
if path not in h5f:
return default
Expand Down Expand Up @@ -86,7 +86,7 @@ def _HDFloadKV(self, filename=None, path=None, datatype=None, default=None):

return value

def _HDFstoreKV(self, filename=None, path=None, key=None, value=None):
def _HDFstoreKV(self, filename:Path, path:str, key:str, value=None)->None:
assert filename is not None, "filename (output filename) cannot be empty"
assert path is not None, "HDF5 path cannot be empty"
assert key is not None, "key cannot be empty"
Expand Down
Loading

0 comments on commit c1b4b8a

Please sign in to comment.