Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Minor release 0.1.1 #9

Merged
merged 8 commits into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion emtools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,5 +24,5 @@
# *
# **************************************************************************

__version__ = '0.1.0'
__version__ = '0.1.1'

3 changes: 2 additions & 1 deletion emtools/jobs/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,5 +15,6 @@
# **************************************************************************

from .pipeline import Pipeline
from .batch_manager import BatchManager

__all__ = ["Pipeline"]
__all__ = ["Pipeline", "BatchManager"]
37 changes: 0 additions & 37 deletions emtools/jobs/__main__.py

This file was deleted.

87 changes: 87 additions & 0 deletions emtools/jobs/batch_manager.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# **************************************************************************
# *
# * Authors: J.M. de la Rosa Trevin ([email protected])
# *
# * This program is free software; you can redistribute it and/or modify
# * it under the terms of the GNU General Public License as published by
# * the Free Software Foundation; either version 3 of the License, or
# * (at your option) any later version.
# *
# * This program is distributed in the hope that it will be useful,
# * but WITHOUT ANY WARRANTY; without even the implied warranty of
# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# * GNU General Public License for more details.
# *
# **************************************************************************

import os
from uuid import uuid4
from datetime import datetime

from emtools.utils import Process


class BatchManager:
""" Class used to generate and handle the creation of batches
from an input stream of items.

This is used for streaming/parallel processing. Batches will have a folder
and a filename is extracted from each item and linked into the batch
folder.
"""
def __init__(self, batchSize, inputItemsIterator, workingPath,
itemFileNameFunc=lambda item: item.getFileName()):
"""
Args:
batchSize: Number of items that will be grouped into one batch
inputItemsIterator: input items iterator
workingPath: path where the batches folder will be created
itemFileNameFunc: function to extract a filename from each item
(by default: lambda item: item.getFileName())
"""
self._items = inputItemsIterator
self._batchSize = batchSize
self._batchCount = 0
self._workingPath = workingPath
self._itemFileNameFunc = itemFileNameFunc

def _createBatchId(self):
# We will use batchCount, before the batch is created
nowPrefix = datetime.now().strftime('%y%m%d-%H%M%S')
countStr = '%02d' % (self._batchCount + 1)
uuidSuffix = str(uuid4()).split('-')[0]
return f"{nowPrefix}_{countStr}_{uuidSuffix}"

def _createBatch(self, items):
batch_id = self._createBatchId()
batch_path = os.path.join(self._workingPath, batch_id)
print(f"Creating batch: {batch_path}")
Process.system(f"rm -rf '{batch_path}'")
Process.system(f"mkdir '{batch_path}'")

for item in items:
fn = self._itemFileNameFunc(item)
baseName = os.path.basename(fn)
os.symlink(os.path.abspath(fn),
os.path.join(batch_path, baseName))
self._batchCount += 1
return {
'items': items,
'id': batch_id,
'path': batch_path,
'index': self._batchCount
}

def generate(self):
""" Generate batches based on the input items. """
items = []

for item in self._items:
items.append(item)

if len(items) == self._batchSize:
yield self._createBatch(items)
items = []

if items:
yield self._createBatch(items)
191 changes: 0 additions & 191 deletions emtools/jobs/motioncor.py

This file was deleted.

5 changes: 3 additions & 2 deletions emtools/metadata/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@
# **************************************************************************

from .table import Column, ColumnList, Table
from .starfile import StarFile
from .starfile import StarFile, StarMonitor
from .epu import EPU
from .misc import Bins, TsBins, DataFiles, MovieFiles
from .sqlite import SqliteFile

__all__ = ["Column", "ColumnList", "Table", "StarFile", "EPU",

__all__ = ["Column", "ColumnList", "Table", "StarFile", "StarMonitor", "EPU",
"Bins", "TsBins", "SqliteFile", "DataFiles", "MovieFiles"]
3 changes: 2 additions & 1 deletion emtools/metadata/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
# **************************************************************************

import os

from datetime import datetime, timedelta

from emtools.utils import Path, Pretty
from emtools.utils import Path, Pretty, Process


class Bins:
Expand Down
Loading
Loading