Skip to content
This repository has been archived by the owner on May 31, 2022. It is now read-only.

Commit

Permalink
Changes
Browse files Browse the repository at this point in the history
  • Loading branch information
pcstout committed May 14, 2019
1 parent c8bf2b3 commit 33ae636
Showing 1 changed file with 37 additions and 49 deletions.
86 changes: 37 additions & 49 deletions ghap_migrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,31 +239,11 @@ def check_names(self, local_path):
dirs, files = self.get_dirs_and_files(local_path)

for file_entry in files:
filename = os.path.basename(file_entry.path)

bad_filename_chars = self.get_invalid_synapse_filename_chars(filename)
if bad_filename_chars:
self.log_error('File Name: "{0}" contains invalid characters: "{1}"'.format(file_entry.path, ''.join(
bad_filename_chars)))

sanitized_name = self.sanitize_entity_name(filename)
if sanitized_name != filename:
logging.info('Sanitizing File Entity Name: {0} -> {1}'.format(filename, sanitized_name))

bad_entity_name_chars = self.get_invalid_synapse_entity_chars(sanitized_name)
if bad_entity_name_chars:
self.log_error('File Entity Name: "{0}" contains invalid characters: "{1}"'.format(file_entry.path,
''.join(
bad_entity_name_chars)))
self.has_invalid_synapse_filename_chars(file_entry.path)
self.sanitize_entity_name('File', file_entry.path)

for dir_entry in dirs:
folder_name = os.path.basename(dir_entry.path)

bad_entity_name_chars = self.get_invalid_synapse_entity_chars(folder_name)
if bad_entity_name_chars:
self.log_error('Folder Entity Name: "{0}" contains invalid characters: "{1}"'.format(dir_entry.path,
''.join(
bad_entity_name_chars)))
self.has_invalid_synapse_entity_chars('Folder', dir_entry.path)
self.check_names(dir_entry.path)

def push_to_synapse(self, git_url, repo_name, repo_path, git_folder, synapse_project_id, synapse_path):
Expand Down Expand Up @@ -438,11 +418,7 @@ def find_or_create_folder(self, path, synapse_parent):

folder_name = os.path.basename(path)

bad_entity_name_chars = self.get_invalid_synapse_entity_chars(folder_name)
if bad_entity_name_chars:
self.log_error(
'Folder Entity Name: "{0}" contains invalid characters: "{1}"'.format(path,
''.join(bad_entity_name_chars)))
if self.has_invalid_synapse_entity_chars('Folder', path):
return synapse_folder

full_synapse_path = self.get_synapse_path(folder_name, synapse_parent)
Expand Down Expand Up @@ -494,23 +470,10 @@ def find_or_upload_file(self, local_file, synapse_parent):
self.add_processed_path(local_file)
return synapse_file

filename = os.path.basename(local_file)

bad_filename_chars = self.get_invalid_synapse_filename_chars(filename)
if bad_filename_chars:
self.log_error('File Name: "{0}" contains invalid characters: "{1}"'.format(local_file, ''.join(
bad_filename_chars)))
if self.has_invalid_synapse_filename_chars(local_file):
return synapse_file

sanitized_name = self.sanitize_entity_name(filename)
if sanitized_name != filename:
logging.info('Sanitizing File Entity Name: {0} -> {1}'.format(filename, sanitized_name))

bad_entity_name_chars = self.get_invalid_synapse_entity_chars(sanitized_name)
if bad_entity_name_chars:
self.log_error('File Entity Name: "{0}" contains invalid characters: "{1}"'.format(local_file, ''.join(
bad_entity_name_chars)))
return synapse_file
sanitized_name = self.sanitize_entity_name('File', local_file)

full_synapse_path = self.get_synapse_path(sanitized_name, synapse_parent)
self.add_full_synapse_path(full_synapse_path, local_file)
Expand Down Expand Up @@ -597,32 +560,57 @@ def get_synapse_path(self, folder_or_filename, parent):
# NOTE: plus signs (+) should be included here but there is a bug in Synapse that prevents them.
VALID_FILENAME_CHARS = frozenset("-_.()&$, %s%s" % (string.ascii_letters, string.digits))

def get_invalid_synapse_filename_chars(self, name):
def has_invalid_synapse_filename_chars(self, local_path):
"""
Returns any invalid characters (for Synapse filenames) from a string.
"""
bad_chars = [c for c in name if c not in self.VALID_FILENAME_CHARS]
filename = os.path.basename(local_path)
bad_chars = [c for c in filename if c not in self.VALID_FILENAME_CHARS]

if bad_chars:
self.log_error('File Name: "{0}" contains invalid characters: {1}'.format(local_path, ''.join(bad_chars)))

return bad_chars

VALID_ENTITY_NAME_CHARS = frozenset("-_.+(), %s%s" % (string.ascii_letters, string.digits))

# Replacement characters for entity names.
ENTITY_NAME_CHAR_MAP = {
'&': 'and'
'&': 'and',
'\'': '',
'"': ''
}

def get_invalid_synapse_entity_chars(self, name):
def has_invalid_synapse_entity_chars(self, entity_type_label, local_path, as_error=True):
"""
Returns any invalid characters (for Synapse entity) from a string.
"""
name = os.path.basename(local_path)
bad_chars = [c for c in name if c not in self.VALID_ENTITY_NAME_CHARS]

if bad_chars:
err = '{0} Entity Name: "{1}" contains invalid characters: {2}'.format(entity_type_label, local_path,
''.join(bad_chars))

if as_error:
self.log_error(err)
else:
logging.info(err)

return bad_chars

def sanitize_entity_name(self, name):
return ''.join(
def sanitize_entity_name(self, entity_type_label, local_path):
name = os.path.basename(local_path)
sanitized_name = ''.join(
c if c in self.VALID_ENTITY_NAME_CHARS else self.ENTITY_NAME_CHAR_MAP.get(c, '_{0}_'.format(ord(c))) for c
in name)

if sanitized_name != name:
self.has_invalid_synapse_entity_chars(entity_type_label, name, as_error=False)
logging.info(' Sanitizing {0} Entity Name: {1} -> {2}'.format(entity_type_label, name, sanitized_name))

return sanitized_name


class LogFilter(logging.Filter):
FILTERS = [
Expand Down

0 comments on commit 33ae636

Please sign in to comment.