Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

WIP:MySQL Directory Format #84

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 95 additions & 46 deletions borgmatic/hooks/mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,10 @@ def execute_dump_command(
mysql_dump_command = tuple(
shlex.quote(part) for part in shlex.split(database.get('mysql_dump_command') or 'mysqldump')
)

dump_format = database.get('format')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Style nit: This variable could easily inlined given that it's not used anywhere else besides the next line.

is_directory_format = dump_format == 'directory'

dump_command = (
mysql_dump_command
+ (tuple(database['options'].split(' ')) if 'options' in database else ())
Expand All @@ -96,7 +100,7 @@ def execute_dump_command(
+ (('--user', database['username']) if 'username' in database else ())
+ ('--databases',)
+ database_names
+ ('--result-file', dump_filename)
+ (('--tab', dump_filename) if is_directory_format else ('--result-file', dump_filename))
)

logger.debug(
Expand All @@ -105,13 +109,20 @@ def execute_dump_command(
if dry_run:
return None

dump.create_named_pipe_for_dump(dump_filename)

return execute_command(
dump_command,
extra_environment=extra_environment,
run_to_completion=False,
)
if is_directory_format:
dump.create_parent_directory_for_dump(dump_filename)
execute_command(
dump_command,
extra_environment=extra_environment,
)
return None
else:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could lose the else: given that the other code path returns early. (Do not feel strongly.)

dump.create_named_pipe_for_dump(dump_filename)
return execute_command(
dump_command,
extra_environment=extra_environment,
run_to_completion=False,
)


def use_streaming(databases, config, log_prefix):
Expand Down Expand Up @@ -153,31 +164,31 @@ def dump_data_sources(databases, config, log_prefix, dry_run):
for dump_name in dump_database_names:
renamed_database = copy.copy(database)
renamed_database['name'] = dump_name
processes.append(
execute_dump_command(
renamed_database,
log_prefix,
dump_path,
(dump_name,),
extra_environment,
dry_run,
dry_run_label,
)
)
else:
processes.append(
execute_dump_command(
database,
result = execute_dump_command(
renamed_database,
log_prefix,
dump_path,
dump_database_names,
(dump_name,),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not quite following this changed and the earlier deletions. Don't you still need some of that logic if the database name is all?

extra_environment,
dry_run,
dry_run_label,
)
if result:
processes.append(result)
else:
result = execute_dump_command(
database,
log_prefix,
dump_path,
dump_database_names,
extra_environment,
dry_run,
dry_run_label,
)
if result:
processes.append(result)

return [process for process in processes if process]
return processes


def remove_data_source_dumps(databases, config, log_prefix, dry_run): # pragma: no cover
Expand Down Expand Up @@ -225,31 +236,69 @@ def restore_data_source_dump(
mysql_restore_command = tuple(
shlex.quote(part) for part in shlex.split(data_source.get('mysql_command') or 'mysql')
)
restore_command = (
mysql_restore_command
+ ('--batch',)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this still needed/beneficial? We certainly don't want interactive behavior from mysql when it's invoked from borgmatic.

+ (
tuple(data_source['restore_options'].split(' '))
if 'restore_options' in data_source
else ()

dump_format = data_source.get('format')
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Similar here; could be inlined.

is_directory_format = dump_format == 'directory'

if is_directory_format:
mysql_restore_command = tuple(
shlex.quote(part) for part in shlex.split(data_source.get('mysql_command') or 'mysql')
)
+ (('--host', hostname) if hostname else ())
+ (('--port', str(port)) if port else ())
+ (('--protocol', 'tcp') if hostname or port else ())
+ (('--user', username) if username else ())
)
restore_command = (
mysql_restore_command
+ ('--batch',)
+ (
tuple(data_source['restore_options'].split(' '))
if 'restore_options' in data_source
else ()
)
+ (('--host', hostname) if hostname else ())
+ (('--port', str(port)) if port else ())
+ (('--protocol', 'tcp') if hostname or port else ())
+ (('--user', username) if username else ())
)
else:
restore_command = (
mysql_restore_command
+ ('--batch',)
+ (
tuple(data_source['restore_options'].split(' '))
if 'restore_options' in data_source
else ()
)
+ (('--host', hostname) if hostname else ())
+ (('--port', str(port)) if port else ())
+ (('--protocol', 'tcp') if hostname or port else ())
+ (('--user', username) if username else ())
)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I'm missing something.. These two commands look identical to me. Maybe it's just still WIP.


extra_environment = {'MYSQL_PWD': password} if password else None

logger.debug(f"{log_prefix}: Restoring MySQL database {data_source['name']}{dry_run_label}")
if dry_run:
return

# Don't give Borg local path so as to error on warnings, as "borg extract" only gives a warning
# if the restore paths don't exist in the archive.
execute_command_with_processes(
restore_command,
[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
extra_environment=extra_environment,
)
if is_directory_format:
dump_path = make_dump_path(config)
dump_directory = dump.make_data_source_dump_filename(
dump_path, data_source['name'], data_source.get('hostname')
)
for file in os.listdir(dump_directory):
file_path = os.path.join(dump_directory, file)
if file.endswith('.sql'):
Copy link
Collaborator

@witten witten Aug 20, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The docs say these are .txt files...?

with open(file_path, 'r') as sql_file:
execute_command_with_processes(
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think you want the _with_processes() variant for this use case. That's intended for passing in, say, an extract_process so as to extract a single file into a restore command via background streaming. But that's not what you're doing here; this is presumably a synchronous call (... it's not streaming from borg extract). So, for that, maybe try just execute_command()?

restore_command,
[],
output_log_level=logging.DEBUG,
input_file=sql_file,
extra_environment=extra_environment,
)
else:
execute_command_with_processes(
restore_command,
[extract_process],
output_log_level=logging.DEBUG,
input_file=extract_process.stdout,
extra_environment=extra_environment,
)
42 changes: 42 additions & 0 deletions tests/unit/hooks/test_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,48 @@ def test_use_streaming_false_for_no_databases():
assert not module.use_streaming(databases=[], config=flexmock(), log_prefix=flexmock())


def test_use_streaming_true_for_any_non_directory_format_databases():
assert module.use_streaming(
databases=[{'format': 'stuff'}, {'format': 'directory'}, {}],
config=flexmock(),
log_prefix=flexmock(),
)


def test_use_streaming_true_for_all_directory_format_databases():
assert module.use_streaming(
databases=[{'format': 'directory'}, {'format': 'directory'}],
config=flexmock(),
log_prefix=flexmock(),
)


def test_dump_data_sources_runs_mysql_dump_with_directory_format():
databases = [{'name': 'foo', 'format': 'directory'}]
flexmock(module).should_receive('make_dump_path').and_return('')
flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
flexmock(module.dump).should_receive('make_data_source_dump_filename').and_return(
'databases/localhost/foo'
)
flexmock(module.os.path).should_receive('exists').and_return(False)
flexmock(module.dump).should_receive('create_parent_directory_for_dump')
flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()

flexmock(module).should_receive('execute_command').with_args(
(
'mysqldump',
'--add-drop-database',
'--databases',
'foo',
'--tab',
'databases/localhost/foo',
),
extra_environment=None,
).and_return(flexmock()).once()

assert module.dump_data_sources(databases, {}, 'test.yaml', dry_run=False) == []


def test_dump_data_sources_dumps_each_database():
databases = [{'name': 'foo'}, {'name': 'bar'}]
processes = [flexmock(), flexmock()]
Expand Down