Skip to content

Commit

Permalink
Update data-doctor dump so it dumps all blocks and transactions
Browse files Browse the repository at this point in the history
  • Loading branch information
JamesPiechota committed Jun 26, 2024
1 parent 4408325 commit 494fd58
Show file tree
Hide file tree
Showing 3 changed files with 101 additions and 26 deletions.
99 changes: 75 additions & 24 deletions apps/arweave/src/ar_doctor_dump.erl
Original file line number Diff line number Diff line change
Expand Up @@ -4,40 +4,91 @@

-include_lib("kernel/include/file.hrl").
-include_lib("arweave/include/ar.hrl").
-include_lib("arweave/include/ar_config.hrl").

main(Args) ->
dump(Args).

help() ->
ar:console("data-doctor dump <data_dir> <output_dir>~n"),
ar:console("data-doctor dump <min_height> <max_height> <data_dir> <output_dir>~n"),
ar:console(" min_height: The minimum height of the blocks to dump.~n"),
ar:console(" max_height: The maximum height of the blocks to dump.~n"),
ar:console(" data_dir: Full path to your data_dir.~n"),
ar:console(" output_dir: Full path to a directory where the dumped data will be written.~n"),
ar:console("~nExample:~n"),
ar:console("data-doctor dump /mnt/arweave-data /mnt/output~n").

dump(Args) when length(Args) < 2 ->
dump(Args) when length(Args) < 4 ->
false;
dump(Args) ->
[DataDir, OutputDir] = Args,

DiskCachePath = filename:join(DataDir, ?DISK_CACHE_DIR),
BlockPath = filename:join(DiskCachePath, ?DISK_CACHE_BLOCK_DIR),

case file:list_dir(BlockPath) of
{ok, Files} ->
lists:foreach(fun(File) ->
FullPath = filename:join(BlockPath, File),
B = ar_storage:read_block_from_file(FullPath, binary),
Height = B#block.height,
io:format("Dumping block ~B~n", [Height]),
Json = ar_serialize:block_to_json_struct(B),
JsonString = ar_serialize:jsonify(Json),
JsonFilename = io_lib:format("~B.json", [Height]),
OutputFilePath = filename:join(OutputDir, JsonFilename),
file:write_file(OutputFilePath, JsonString)
end, Files);
{error, Reason} ->
io:format("Failed to list directory: ~p~n", [Reason])
end,

[MinHeight, MaxHeight, DataDir, OutputDir] = Args,

ok = filelib:ensure_dir(filename:join([OutputDir, "blocks", "dummy"])),
ok = filelib:ensure_dir(filename:join([OutputDir, "txs", "dummy"])),

Config = #config{data_dir = DataDir},
application:set_env(arweave, config, Config),
ar_kv_sup:start_link(),
ar_storage_sup:start_link(),

dump_blocks(<<>>, list_to_integer(MinHeight), list_to_integer(MaxHeight), OutputDir),
true.

dump_blocks(Cursor, MinHeight, MaxHeight, OutputDir) ->
case ar_kv:get_next(block_db, Cursor) of
{ok, BH, Bin} ->
% Process the value here if needed
H = ar_util:encode(BH),
try
case ar_serialize:binary_to_block(Bin) of
{ok, B} ->
case B#block.height >= MinHeight andalso B#block.height =< MaxHeight of
true ->
io:format("Block: ~p / ~p", [B#block.height, H]),
JsonFilename = io_lib:format("~s.json", [ar_util:encode(B#block.indep_hash)]),
OutputFilePath = filename:join([OutputDir, "blocks", JsonFilename]),
case file:read_file_info(OutputFilePath) of
{ok, _FileInfo} ->
io:format(" ... skipping~n"),
ok; % File exists, do nothing
{error, enoent} ->
io:format(" ... writing~n"),
% File does not exist, proceed with processing
dump_txs(B#block.txs, OutputDir),
Json = ar_serialize:block_to_json_struct(B),
JsonString = ar_serialize:jsonify(Json),
file:write_file(OutputFilePath, JsonString)
end;
false ->
ok
end;
_ ->
ok
end
catch
Type:Reason ->
io:format("Error processing cursor ~p: ~p:~p~n", [Cursor, Type, Reason])
end,

<< Start:384 >> = BH,
NextCursor = << (Start + 1):384 >>,
dump_blocks(NextCursor, MinHeight, MaxHeight, OutputDir); % Recursive call with the new cursor
none ->
io:format("No more entries.~n")
end.

dump_txs([], OutputDir) ->
ok;
dump_txs([TXID | TXIDs], OutputDir) ->
case ar_kv:get(tx_db, TXID) of
{ok, Bin} ->
{ok, TX} = ar_serialize:binary_to_tx(Bin),
Json = ar_serialize:tx_to_json_struct(TX),
JsonString = ar_serialize:jsonify(Json),
JsonFilename = io_lib:format("~s.json", [ar_util:encode(TXID)]),
OutputFilePath = filename:join([OutputDir, "txs", JsonFilename]),
file:write_file(OutputFilePath, JsonString);
_ ->
ok
end,
dump_txs(TXIDs, OutputDir).
25 changes: 25 additions & 0 deletions apps/arweave/src/ar_storage_sup.erl
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
-module(ar_storage_sup).

-behaviour(supervisor).

-export([start_link/0]).

-export([init/1]).

-include_lib("arweave/include/ar_sup.hrl").
-include_lib("arweave/include/ar_config.hrl").

%%%===================================================================
%%% Public interface.
%%%===================================================================

start_link() ->
supervisor:start_link({local, ?MODULE}, ?MODULE, []).

%% ===================================================================
%% Supervisor callbacks.
%% ===================================================================

init([]) ->
ets:new(ar_storage, [set, public, named_table, {read_concurrency, true}]),
{ok, {{one_for_one, 5, 10}, [?CHILD(ar_storage, worker)]}}.
3 changes: 1 addition & 2 deletions apps/arweave/src/ar_sup.erl
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ init([]) ->
ets:new(ar_peers, [set, public, named_table, {read_concurrency, true}]),
ets:new(ar_http, [set, public, named_table]),
ets:new(ar_blacklist_middleware, [set, public, named_table]),
ets:new(ar_storage, [set, public, named_table, {read_concurrency, true}]),
ets:new(ar_randomx_state_key_blocks, [set, public, named_table]),
ets:new(ar_randomx_state_key_heights, [ordered_set, public, named_table]),
ets:new(blacklist, [set, public, named_table]),
Expand Down Expand Up @@ -71,7 +70,7 @@ init([]) ->
?CHILD_SUP(ar_events_sup, supervisor),
?CHILD_SUP(ar_http_sup, supervisor),
?CHILD_SUP(ar_kv_sup, supervisor),
?CHILD(ar_storage, worker),
?CHILD_SUP(ar_storage_sup, supervisor),
?CHILD(ar_peers, worker),
?CHILD(ar_disk_cache, worker),
?CHILD(ar_watchdog, worker),
Expand Down

0 comments on commit 494fd58

Please sign in to comment.