Skip to content

Commit

Permalink
Merge pull request #72 from MoritzBoehme/include_other_files
Browse files Browse the repository at this point in the history
feat: add .filetags syntax for including other files
  • Loading branch information
novoid authored Aug 26, 2024
2 parents da97bb0 + a60aca7 commit b84bfaf
Show file tree
Hide file tree
Showing 2 changed files with 150 additions and 46 deletions.
107 changes: 65 additions & 42 deletions filetags/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,9 @@ def safe_import(library):
DONOTSUGGEST_PREFIX = '#donotsuggest '
do_not_suggest_tags = [] # list of lower-case strings

INCLUDE_PREFIX = '#include '
included_files = []

DESCRIPTION = "This tool adds or removes simple tags to/from file names.\n\
\n\
Tags within file names are placed between the actual file name and\n\
Expand Down Expand Up @@ -1738,56 +1741,76 @@ def locate_and_parse_controlled_vocabulary(startfile):

global unique_tags
global do_not_suggest_tags
global included_files

if filename:
logging.debug('locate_and_parse_controlled_vocabulary: .filetags found: ' + filename)
if os.path.isfile(filename):
logging.debug('locate_and_parse_controlled_vocabulary: found controlled vocabulary')

tags = []
with codecs.open(filename, encoding='utf-8') as filehandle:
logging.debug('locate_and_parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
filename)
global controlled_vocabulary_filename
controlled_vocabulary_filename = filename
for rawline in filehandle:

if rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
# parse and save do not suggest tags:
line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
return parse_controlled_vocabulary(filename)
else:
logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary')
return []

def parse_controlled_vocabulary(filename):
"""Parses a controlled vocabulary file."""
files_to_include = []

logging.debug('parse_controlled_vocabulary: .filetags found: ' + filename)
if os.path.isfile(filename):
logging.debug('parse_controlled_vocabulary: found controlled vocabulary')

included_files.append(os.path.realpath(filename))

tags = []
with codecs.open(filename, encoding='utf-8') as filehandle:
logging.debug('parse_controlled_vocabulary: reading controlled vocabulary in [%s]' %
filename)
global controlled_vocabulary_filename
controlled_vocabulary_filename = filename
for rawline in filehandle:
if rawline.strip().lower().startswith(INCLUDE_PREFIX):
file_to_include = rawline.strip().removeprefix(INCLUDE_PREFIX)
current_file_dir = os.path.dirname(filename)
file_path = os.path.realpath(os.path.join(current_file_dir, file_to_include))
logging.debug('parse_controlled_vocabulary: found include statement for file [%s]' % file_path)
if file_path not in included_files:
files_to_include.append(file_path)
logging.debug('parse_controlled_vocabulary: including file [%s]' % file_path)

elif rawline.strip().lower().startswith(DONOTSUGGEST_PREFIX):
# parse and save do not suggest tags:
line = rawline[len(DONOTSUGGEST_PREFIX):].strip().lower()
for tag in line.split(BETWEEN_TAG_SEPARATOR):
do_not_suggest_tags.append(tag)
else:

# remove everyting after the first hash character (which is a comment separator)
line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line"

if len(line) == 0:
# nothing left, line consisted only of a comment or was empty
continue

if BETWEEN_TAG_SEPARATOR in line:
## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
logging.debug('parse_controlled_vocabulary: found unique tags: %s' %
(line))
unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
for tag in line.split(BETWEEN_TAG_SEPARATOR):
do_not_suggest_tags.append(tag)
# *also* append unique tags to general tag list:
tags.append(tag)
else:
tags.append(line)

# remove everyting after the first hash character (which is a comment separator)
line = rawline.strip().split('#')[0].strip() # split and take everything before the first '#' as new "line"

if len(line) == 0:
# nothing left, line consisted only of a comment or was empty
continue

if BETWEEN_TAG_SEPARATOR in line:
## if multiple tags are in one line, they are mutually exclusive: only has can be set via filetags
logging.debug('locate_and_parse_controlled_vocabulary: found unique tags: %s' %
(line))
unique_tags.append(line.split(BETWEEN_TAG_SEPARATOR))
for tag in line.split(BETWEEN_TAG_SEPARATOR):
# *also* append unique tags to general tag list:
tags.append(tag)
else:
tags.append(line)
for file in files_to_include:
tags.extend(parse_controlled_vocabulary(file))

logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i tags' %
len(tags))
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
(len(unique_tags) - 1))
logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i tags' %
len(tags))
logging.debug('parse_controlled_vocabulary: controlled vocabulary has %i groups of unique tags' %
(len(unique_tags) - 1))

return tags
else:
logging.debug('locate_and_parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
return []
return tags
else:
logging.debug('locate_and_parse_controlled_vocabulary: could not derive filename for controlled vocabulary')
logging.debug('parse_controlled_vocabulary: controlled vocabulary is a non-existing file')
return []


Expand Down
89 changes: 85 additions & 4 deletions tests/unit_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,9 +462,8 @@ def test_comment_line_in_cv(self):
self.assertEqual(set(cv), set(["foo", "bar", "baz", "tag"]))


def test_include_lines_in_cv(self):
def test_include_lines_in_cv_not_circular(self):
"""
FIXXME!!!!
This tests does not use the setup from the test class. However, it does use several
other util functions defined in this class. Therefore, I set up a different test
case here and re-use the util functions.
Expand All @@ -473,13 +472,95 @@ def test_include_lines_in_cv(self):
tmpdir
`- subdir1
|
`- .filetags with a reference to subdir2/included_filetags
`- .filetags with a reference to subdir2/included.filetags
- subdir2
|
`- included_filetags with additional tags
"""
pass # FIXXME: implement
tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
print("\ntempdir: " + tempdir + ' <<<' + '#' * 10)

subdir1 = os.path.join(tempdir, "subdir1")
os.makedirs(subdir1)
assert(os.path.exists(subdir1))

subdir2 = os.path.join(tempdir, "subdir2")
os.makedirs(subdir2)
assert(os.path.exists(subdir2))

include_cv = """
tag_from_include_before_CV
#include ../subdir2/included.filetags
tag_from_include_after_CV
"""
include_file = os.path.join(subdir1, '.filetags')
self.create_file(include_file, include_cv)
assert(os.path.isfile(include_file))

included_cv = 'tag_from_included_CV'
included_file = os.path.join(subdir2, 'included.filetags')
self.create_file(included_file, included_cv)
assert(os.path.isfile(included_file))

if platform.system() != 'Windows':
os.sync()

# setup complete

cv = filetags.locate_and_parse_controlled_vocabulary(include_file)
self.assertEqual(set(cv), set(["tag_from_include_before_CV", "tag_from_include_after_CV", "tag_from_included_CV"]))

def test_include_lines_in_cv_circular(self):
"""
This tests does not use the setup from the test class. However, it does use several
other util functions defined in this class. Therefore, I set up a different test
case here and re-use the util functions.
Setup looks like this:
tmpdir
`- subdir1
|
`- .filetags with a reference to subdir2/included.filetags
- subdir2
|
`- included.filetags with additional tags and reference to subdir1/.filetags
"""
tempdir = tempfile.mkdtemp(prefix="TestControlledVocabulary_Include")
print("\ntempdir: " + tempdir + ' <<<' + '#' * 10)

subdir1 = os.path.join(tempdir, "subdir1")
os.makedirs(subdir1)
assert(os.path.exists(subdir1))

subdir2 = os.path.join(tempdir, "subdir2")
os.makedirs(subdir2)
assert(os.path.exists(subdir2))

circular1_cv = """
tag_from_first_before_CV
#include ../subdir2/included.filetags
tag_from_first_after_CV
"""
circular1_file = os.path.join(subdir1, '.filetags')
self.create_file(circular1_file, circular1_cv)
assert(os.path.isfile(circular1_file))

circular2_cv = """
tag_from_second_before_CV
#include ../subdir1/.filetags
tag_from_second_after_CV
"""
circular2_file = os.path.join(subdir2, 'included.filetags')
self.create_file(circular2_file, circular2_cv)
assert(os.path.isfile(circular2_file))

if platform.system() != 'Windows':
os.sync()

# setup complete

cv = filetags.locate_and_parse_controlled_vocabulary(circular1_file)
self.assertEqual(set(cv), set(["tag_from_first_before_CV", "tag_from_first_after_CV", "tag_from_second_before_CV", "tag_from_second_after_CV"]))

class TestFileWithoutTags(unittest.TestCase):

Expand Down

0 comments on commit b84bfaf

Please sign in to comment.