Skip to content
This repository has been archived by the owner on May 31, 2022. It is now read-only.

Commit

Permalink
Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pcstout committed May 14, 2019
1 parent 33ae636 commit c3f383f
Showing 1 changed file with 21 additions and 17 deletions.
38 changes: 21 additions & 17 deletions ghap_migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,11 +239,13 @@ def check_names(self, local_path):
dirs, files = self.get_dirs_and_files(local_path)

for file_entry in files:
self.has_invalid_synapse_filename_chars(file_entry.path)
if self.get_invalid_synapse_filename_chars(file_entry.path):
continue

self.sanitize_entity_name('File', file_entry.path)

for dir_entry in dirs:
self.has_invalid_synapse_entity_chars('Folder', dir_entry.path)
self.get_invalid_synapse_entity_chars('Folder', dir_entry.path)
self.check_names(dir_entry.path)

def push_to_synapse(self, git_url, repo_name, repo_path, git_folder, synapse_project_id, synapse_path):
Expand Down Expand Up @@ -418,7 +420,7 @@ def find_or_create_folder(self, path, synapse_parent):

folder_name = os.path.basename(path)

if self.has_invalid_synapse_entity_chars('Folder', path):
if self.get_invalid_synapse_entity_chars('Folder', path):
return synapse_folder

full_synapse_path = self.get_synapse_path(folder_name, synapse_parent)
Expand Down Expand Up @@ -470,7 +472,7 @@ def find_or_upload_file(self, local_file, synapse_parent):
self.add_processed_path(local_file)
return synapse_file

if self.has_invalid_synapse_filename_chars(local_file):
if self.get_invalid_synapse_filename_chars(local_file):
return synapse_file

sanitized_name = self.sanitize_entity_name('File', local_file)
Expand Down Expand Up @@ -560,7 +562,7 @@ def get_synapse_path(self, folder_or_filename, parent):
# NOTE: plus signs (+) should be included here but there is a bug in Synapse that prevents them.
VALID_FILENAME_CHARS = frozenset("-_.()&$, %s%s" % (string.ascii_letters, string.digits))

def has_invalid_synapse_filename_chars(self, local_path):
def get_invalid_synapse_filename_chars(self, local_path):
"""
Returns any invalid characters (for Synapse filenames) from a string.
"""
Expand All @@ -581,33 +583,35 @@ def has_invalid_synapse_filename_chars(self, local_path):
'"': ''
}

def has_invalid_synapse_entity_chars(self, entity_type_label, local_path, as_error=True):
def get_invalid_synapse_entity_chars(self, entity_type_label, local_path, log_it=True):
"""
Returns any invalid characters (for Synapse entity) from a string.
"""
name = os.path.basename(local_path)
bad_chars = [c for c in name if c not in self.VALID_ENTITY_NAME_CHARS]

if bad_chars:
err = '{0} Entity Name: "{1}" contains invalid characters: {2}'.format(entity_type_label, local_path,
''.join(bad_chars))

if as_error:
self.log_error(err)
else:
logging.info(err)
if bad_chars and log_it:
self.log_error(
'{0} Entity Name: "{1}" contains invalid characters: {2}'.format(entity_type_label, local_path,
''.join(bad_chars)))

return bad_chars

def sanitize_entity_name(self, entity_type_label, local_path):
name = os.path.basename(local_path)

cleaned_filename = unicodedata.normalize('NFKD', u'{0}'.format(name)).encode('ASCII', 'ignore').decode()

sanitized_name = ''.join(
c if c in self.VALID_ENTITY_NAME_CHARS else self.ENTITY_NAME_CHAR_MAP.get(c, '_{0}_'.format(ord(c))) for c
in name)
in cleaned_filename)

if sanitized_name != name:
self.has_invalid_synapse_entity_chars(entity_type_label, name, as_error=False)
logging.info(' Sanitizing {0} Entity Name: {1} -> {2}'.format(entity_type_label, name, sanitized_name))
bad_chars = self.get_invalid_synapse_entity_chars(entity_type_label, name, log_it=False)
logging.info(
'Sanitizing {0} Entity Name: {1} -> {2} : invalid characters: {3}'.format(entity_type_label, name,
sanitized_name,
''.join(bad_chars)))

return sanitized_name

Expand Down

0 comments on commit c3f383f

Please sign in to comment.