-
Notifications
You must be signed in to change notification settings - Fork 4
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Julien Sagot
committed
Sep 18, 2019
1 parent
9de5318
commit 1f932d8
Showing
6 changed files
with
208 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,22 @@ | ||
# geneweb-compact | ||
|
||
A tool to remove unused values from your database. | ||
|
||
It only support old legacy gwbd format. | ||
|
||
## Why? | ||
|
||
When modifiying a text, deleting a person or a family, GeneWeb does | ||
not actually delete data, it just remove the links to this data, | ||
instead. | ||
|
||
It can be a security problem, if you though you erased sensitive data | ||
from a note which is still present in the database. | ||
|
||
Also, as the time goes by, leftover data can grow waste some space en | ||
resources. | ||
|
||
## What? | ||
|
||
Current stage of the tool only report and dump leftover data. Next | ||
step is to actually perfom a database cleanup. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,177 @@ | ||
open Geneweb | ||
open Def | ||
open Dbdisk | ||
|
||
let fast = ref true | ||
let step_strings = ref false | ||
let step_persons = ref false | ||
let step_families = ref false | ||
|
||
let load_array a = if !fast then a.load_array () | ||
let clear_array a = a.clear_array () | ||
|
||
let split_sname base i = Mutil.split_sname @@ base.data.strings.get i | ||
let split_fname base i = Mutil.split_fname @@ base.data.strings.get i | ||
|
||
let scan base string person family = | ||
let mark t i = try t.(i) <- true with _ -> failwith @@ string_of_int i in | ||
if !step_strings then begin | ||
load_array base.data.strings ; | ||
let rev = Hashtbl.create base.data.strings.len in | ||
for i = 0 to base.data.strings.len - 1 do | ||
Hashtbl.add rev (base.data.strings.get i) i | ||
done ; | ||
let opt_mark t s = match Hashtbl.find_opt rev s with Some x -> mark t x | None -> () in | ||
let t = Array.make base.data.strings.len false in | ||
load_array base.data.persons ; | ||
for i = 0 to base.data.persons.len - 1 do | ||
let p = base.data.persons.get i in | ||
mark t p.first_name ; | ||
List.iter (opt_mark t) (split_fname base p.first_name) ; | ||
mark t p.surname ; | ||
List.iter (opt_mark t) (split_sname base p.surname) ; | ||
opt_mark t @@ Name.concat (base.data.strings.get p.first_name) (base.data.strings.get p.surname) ; | ||
mark t p.image ; | ||
mark t p.public_name ; | ||
List.iter (mark t) p.qualifiers ; | ||
List.iter (mark t) p.aliases ; | ||
List.iter (mark t) p.first_names_aliases ; | ||
List.iter (mark t) p.surnames_aliases ; | ||
List.iter begin fun {t_name;t_ident;t_place; _ } -> | ||
mark t t_ident ; | ||
mark t t_place ; | ||
(match t_name with Tname i -> mark t i | _ -> ()) ; | ||
end p.titles ; | ||
mark t p.occupation ; | ||
mark t p.birth_place ; | ||
mark t p.birth_note ; | ||
mark t p.birth_src ; | ||
mark t p.baptism_place ; | ||
mark t p.baptism_note ; | ||
mark t p.baptism_src ; | ||
mark t p.death_place ; | ||
mark t p.death_note ; | ||
mark t p.death_src ; | ||
mark t p.burial_place ; | ||
mark t p.burial_note ; | ||
mark t p.burial_src ; | ||
mark t p.notes ; | ||
mark t p.psources ; | ||
List.iter (fun {r_sources;_} -> mark t r_sources) p.rparents ; | ||
List.iter begin fun {epers_name;epers_place;epers_reason;epers_note;epers_src;_} -> | ||
(match epers_name with Epers_Name i -> mark t i | _ -> ()) ; | ||
mark t epers_place ; | ||
mark t epers_reason ; | ||
mark t epers_note ; | ||
mark t epers_src ; | ||
end p.pevents | ||
done ; | ||
clear_array base.data.persons ; | ||
load_array base.data.families ; | ||
for i = 0 to base.data.families.len - 1 do | ||
let f = base.data.families.get i in | ||
mark t f.marriage_place ; | ||
mark t f.marriage_note ; | ||
mark t f.marriage_src ; | ||
mark t f.comment ; | ||
mark t f.origin_file ; | ||
mark t f.fsources ; | ||
List.iter begin fun {efam_name;efam_place;efam_reason;efam_note;efam_src;_} -> | ||
(match efam_name with Efam_Name i -> mark t i | _ -> ()) ; | ||
mark t efam_place ; | ||
mark t efam_reason ; | ||
mark t efam_note ; | ||
mark t efam_src ; | ||
end f.fevents | ||
done ; | ||
clear_array base.data.families ; | ||
clear_array base.data.strings ; | ||
string base t | ||
end ; | ||
if !step_persons then begin | ||
load_array base.data.persons ; | ||
let t = Array.make base.data.persons.len false in | ||
for i = 0 to base.data.persons.len - 1 do | ||
if (base.data.persons.get i).key_index <> Gwdb1.dummy_iper then mark t i | ||
done ; | ||
clear_array base.data.persons ; | ||
person base t | ||
end ; | ||
if !step_families then begin | ||
load_array base.data.families ; | ||
let t = Array.make base.data.families.len false in | ||
for i = 0 to base.data.families.len - 1 do | ||
if (base.data.families.get i).fam_index <> Gwdb1.dummy_ifam then mark t i | ||
done ; | ||
clear_array base.data.families ; | ||
family base t | ||
end | ||
|
||
let report base = | ||
let aux fn _base t = | ||
let cnt = ref 0 in | ||
for i = 0 to Array.length t - 1 do if not t.(i) then incr cnt done ; | ||
fn !cnt | ||
in | ||
scan | ||
base | ||
(aux @@ fun c -> Printf.printf "Number of unused strings: %d\n" c) | ||
(aux @@ fun c -> Printf.printf "Number of ghost persons: %d\n" c) | ||
(aux @@ fun c -> Printf.printf "Number of ghost families: %d\n" c) | ||
|
||
let dump base = | ||
let dump_istr base t = | ||
for i = 0 to Array.length t - 1 do | ||
if not t.(i) then begin | ||
Printf.printf "=== [START ISTR %d] ===\n%s\n=== [END ISTR %d] ===\n\n" i (base.data.strings.get i) i | ||
end | ||
done | ||
in | ||
let dump_iper _base t = | ||
for i = 0 to Array.length t - 1 do | ||
if not t.(i) then begin Printf.printf "Ghost person: %d\n" i end | ||
done | ||
in | ||
let dump_ifam _base t = | ||
for i = 0 to Array.length t - 1 do | ||
if not t.(i) then begin Printf.printf "Ghost family: %d\n" i end | ||
done | ||
in | ||
scan base dump_istr dump_iper dump_ifam | ||
|
||
let steps = ref "strings,persons,families" | ||
let bname = ref "" | ||
let action = ref None | ||
let usage = "Usage: " ^ Sys.argv.(0) ^ " [OPTION] ACTIOn base" | ||
|
||
let speclist = | ||
[ ( "-mem" | ||
, Arg.Clear fast | ||
, " slower, but use less memory" ) | ||
; ( "-step" | ||
, Arg.Set_string steps | ||
, " STEPS steps to perform. Default is " ^ !steps ) | ||
; ( "-report" | ||
, Arg.Unit (fun () -> action := Some `report) | ||
, " only report number of unused values" ) | ||
; ( "-dump" | ||
, Arg.Unit (fun () -> action := Some `dump) | ||
, " dump unuse values" ) | ||
] | ||
|
||
let _ = | ||
Arg.parse speclist (fun s -> bname := s) usage ; | ||
Secure.set_base_dir (Filename.dirname !bname) ; | ||
if !bname = "" then begin Arg.usage speclist usage ; exit 2 end; | ||
List.iter begin function | ||
| "strings" -> step_strings := true | ||
| "persons" -> step_persons := true | ||
| "families" -> step_families := true | ||
| _ -> Arg.usage speclist usage ; exit 2 | ||
end (String.split_on_char ',' !steps) ; | ||
Lock.control (Mutil.lock_file !bname) false ~onerror:Lock.print_try_again @@ fun () -> | ||
let base = Gwdb1.OfGwdb.base (Gwdb.open_base !bname) in | ||
match !action with | ||
| None -> Arg.usage speclist usage ; exit 2 | ||
| Some `dump -> dump base | ||
| Some `report -> report base |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
(executable | ||
(name compact) | ||
(public_name geneweb-compact) | ||
(libraries unix str geneweb.gwdb1 geneweb.wserver geneweb) | ||
(modules compact) | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
(lang dune 1.10) | ||
(name geneweb-compact) |
Empty file.