merge conflict

GRAAL-Research · Jan 9, 2024 · 9ae6f20 · 9ae6f20
2 parents e7ea1ed + e156c29
commit 9ae6f20
Show file tree

Hide file tree

Showing 56 changed files with 387 additions and 359 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -15,7 +15,7 @@
 
 - Added "contributing to"
 - Added fix for comma problem (#56)
-- Added content in Address Parser doc for tags definition
+- Added content in Address Parser documentation for tags definition
 - Fixed Pylint bug with PyTorch 1.6
 - Fixed `pack_padded` cpu error with PyTorch new release
 
@@ -75,15 +75,15 @@
 
 ## 0.3.6
 
-- Added a method for a dict conversion of parsed addresses for simpler `Pandas` integration.
+- Added a method for dictionary conversion of parsed addresses for simpler `Pandas` integration.
 - Added examples for parsing addresses and how to convert them into a DataFrame.
 - Fixed error with download module.
 
 ## 0.4
 
 - Added verbose flag to training and test based on the __init__ of address parser.
 - Added a feature to retrain our models with prediction tags dictionary different from the default one.
-- Added in-doc code examples.
+- Added in-documentation code examples.
 - Added code examples.
 - Small improvement of models implementation.
 
@@ -134,7 +134,7 @@
 ## 0.6.2
 
 - Improved (slightly) code speed of data padding method as per PyTorch list or array to Tensor recommendation.
-- Improved doc for RuntimeError due to retraining FastText and BPEmb model in the same directory.
+- Improved documentation for RuntimeError due to retraining FastText and BPEmb model in the same directory.
 - Added error handling RuntimeError when retraining.
 
 ## 0.6.3
@@ -162,21 +162,21 @@
 ## 0.6.6
 
 - Fixed errors in code examples
-- Improved doc of download_from_url
+- Improved documentation of download_from_url
 - Improve error management of retrain and test
 
 ## 0.6.7
 
 - Fixed errors in data validation
-- Improved doc over data validation
+- Improved documentation over data validation
 - Bugfix data slicing error with data containers
 - Add an example on how to use a retrained model
 
 ## 0.7
 
 - Improved CLI
 - Fixed bug in CLI export dataset
-- Improved the doc of the CLI
+- Improved the documentation of the CLI
 
 ## 0.7.1
 
@@ -208,7 +208,7 @@
   user-given name
 - Hot-fix missing raise for DataError validation of address to parse when address is tuple
 - Bug-fix handling of string column name for CSVDatasetContainer that raised ValueError
-- Improve parse CLI doc and fix error in doc stating JSON format is supported as input data
+- Improve parse CLI documentation and fix error in documentation stating JSON format is supported as input data
 - Add batch_size to parse CLI
 - Add minimum version to Gensim 4.0.0.
 - Add a new CLI function, retrain, to retrain from the command line

diff --git a/README.md b/README.md
@@ -391,7 +391,7 @@ address_parser.retrain(
 
 ### Download Our Models
 
-Here are the URLs to download our pretrained models directly
+Deepparse handles model downloads when you use it, but you can also pre-download our model. Here are the URLs to download our pretrained models directly
 
 - [FastText](https://graal.ift.ulaval.ca/public/deepparse/fasttext.ckpt),
 - [FastTextAttention](https://graal.ift.ulaval.ca/public/deepparse/fasttext_attention.ckpt),
@@ -400,7 +400,7 @@ Here are the URLs to download our pretrained models directly
 - [FastText Light](https://graal.ift.ulaval.ca/public/deepparse/fasttext.magnitude.gz) (
   using [Magnitude Light](https://github.com/davebulaval/magnitude-light)).
 
-Or you can use our cli to download our pretrained models directly using:
+Or you can use our CLI to download our pretrained models directly using:
 
 ```sh
 download_model <model_name>

diff --git a/deepparse/app/request_examples.http b/deepparse/app/request_examples.http
@@ -13,6 +13,6 @@ POST 0.0.0.0:80/api/parse/bpemb-attention HTTP/1.1
 Content-Type: application/json
 
 [
-  {"address": "16 rue Grande-Place, Victoriaville, QC, G6S 1E6"},
-  {"address": "123 rue Valancourt, Val-Alain, quebec, g9v1s3"}
+  {"address": "16 rue grande-place, victoriaville, qc, g6s 1e6"},
+  {"address": "123 rue valancourt, val-alain, quebec, g9v 1s3"}
 ]
diff --git a/deepparse/cli/download_model.py b/deepparse/cli/download_model.py
@@ -1,13 +1,12 @@
 import argparse
 import sys
 
-
 from deepparse.download_tools import download_model, MODEL_MAPPING_CHOICES
 
 
 def main(args=None) -> None:
     """
-    CLI function to manually download all the dependencies for a pretrained model.
+    CLI function to download all the dependencies for a pretrained model manually.
 
     Example of usage:
 
@@ -41,7 +40,7 @@ def get_parser() -> argparse.ArgumentParser:
         "--saving_cache_dir",
         type=str,
         default=None,
-        help="To change the default saving cache directory (default to None e.g. default path).",
+        help="To change the default saving cache directory (default to None, e.g. default path).",
     )
 
     return parser

diff --git a/deepparse/cli/download_models.py b/deepparse/cli/download_models.py
@@ -6,7 +6,7 @@
 
 def main(args=None) -> None:
     """
-    CLI function to manually download all the dependencies for all pretrained models.
+    CLI function to download all the dependencies for all pretrained models manually.
 
     Example of usage:
 
@@ -34,7 +34,7 @@ def get_parser() -> argparse.ArgumentParser:
         "--saving_cache_dir",
         type=str,
         default=None,
-        help="To change the default saving cache directory (default to None e.g. default path).",
+        help="To change the default saving cache directory (default to None, e.g. default path).",
     )
 
     return parser

diff --git a/deepparse/cli/parse.py b/deepparse/cli/parse.py
@@ -32,15 +32,15 @@
 def main(args=None) -> None:
     # pylint: disable=too-many-locals, too-many-branches
     """
-    CLI function to rapidly parse an addresses dataset and output it in another file.
+    CLI function to easily parse an address dataset and output it in another file.
 
     Examples of usage:
 
     .. code-block:: sh
 
         parse fasttext ./dataset_path.csv parsed_address.pickle
 
-    Using a gpu device
+    Using a GPU device
 
     .. code-block:: sh
 
@@ -119,7 +119,7 @@ def main(args=None) -> None:
 
 
 def get_parser() -> argparse.ArgumentParser:
-    """Return ArgumentParser for the cli."""
+    """Return ArgumentParser for the CLI."""
 
     parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
     parser.add_argument(
@@ -137,11 +137,11 @@ def get_parser() -> argparse.ArgumentParser:
     parser.add_argument(
         "export_filename",
         help=wrap(
-            "The filename to use to export the parsed addresses. We will infer the file format base on the "
+            "The filename to use to export the parsed addresses. We will infer the file format based on the "
             "file extension. That is, if the file is a pickle (.p or .pickle), we will export it into a pickle file. "
-            "The supported format are Pickle, CSV and JSON. "
+            "The supported formats are Pickle, CSV and JSON. "
             "The file will be exported in the same repositories as the dataset_path. "
-            "See the doc for more details on the format exporting."
+            "See the documentation for more details on the format exporting."
         ),
         type=str,
     )

diff --git a/deepparse/cli/parser_arguments_adder.py b/deepparse/cli/parser_arguments_adder.py
@@ -25,7 +25,7 @@ def add_csv_column_name_arg(parser: ArgumentParser) -> None:
     parser.add_argument(
         "--csv_column_name",
         help=wrap(
-            "The column name to extract address in the CSV. Need to be specified if the provided dataset_path "
+            "The column name to extract the address in the CSV. It needs to be specified if the provided dataset_path "
             "leads to a CSV file."
         ),
         type=str,
@@ -37,7 +37,7 @@ def add_csv_column_names_arg(parser: ArgumentParser) -> None:
     parser.add_argument(
         "--csv_column_names",
         help=wrap(
-            "The column names to extract address and tags in the CSV. Need to be specified if the provided "
+            "The column names to extract addresses and tags in the CSV. It needs to be specified if the provided "
             "dataset_path leads to a CSV file. Column names have to be separated by a whitespace. For"
             "example, --csv_column_names column1 column2. By default, None."
         ),

diff --git a/deepparse/cli/retrain.py b/deepparse/cli/retrain.py
@@ -64,7 +64,7 @@ def handle_prediction_tags(parsed_args):
 def main(args=None) -> None:
     # pylint: disable=too-many-locals, too-many-branches
     """
-    CLI function to rapidly retrain an addresses parser and saves it. One can retrain a base pretrained model
+    CLI function to easily retrain an address parser and save it. One can retrain a base pretrained model
     using most of the arguments as the :meth:`~AddressParser.retrain` method. By default, all the parameters have
     the same default value as the :meth:`~AddressParser.retrain` method. The supported parameters are the following:
 
@@ -86,7 +86,7 @@ def main(args=None) -> None:
 
         retrain fasttext ./train_dataset_path.csv
 
-    Using a gpu device
+    Using a GPU device
 
     .. code-block:: sh
 
@@ -142,7 +142,7 @@ def main(args=None) -> None:
 
 
 def get_parser() -> argparse.ArgumentParser:
-    """Return ArgumentParser for the cli."""
+    """Return ArgumentParser for the CLI."""
 
     parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
 
@@ -198,8 +198,8 @@ def get_parser() -> argparse.ArgumentParser:
         "--logging_path",
         help=wrap(
             "The logging path for the checkpoints and the retrained model. "
-            "Note that training creates checkpoints, and we use Poutyne library that use the best epoch "
-            "model and reloads the state if any checkpoints are already there. "
+            "Note that training creates checkpoints, and we use the Poutyne library that uses the best epoch "
+            "model and reload the state if any checkpoints are already there. "
             "Thus, an error will be raised if you change the model type. For example, "
             "you retrain a FastText model and then retrain a BPEmb in the same logging path directory."
             "By default, the path is './checkpoints'."
@@ -241,7 +241,7 @@ def get_parser() -> argparse.ArgumentParser:
         help=wrap(
             "Path to a JSON file of prediction tags to use to retrain. Tags are in a key-value style, where "
             "the key is the tag name, and the value is the index one."
-            "The last element has to be an EOS tag. Read the doc for more detail about EOS tag."
+            "The last element has to be an EOS tag. Read the documentation for more details about the EOS tag."
         ),
         default=None,
         type=str,

diff --git a/deepparse/cli/test.py b/deepparse/cli/test.py
@@ -108,7 +108,7 @@ def main(args=None) -> None:
 
 
 def get_parser() -> argparse.ArgumentParser:
-    """Return ArgumentParser for the cli."""
+    """Return ArgumentParser for the CLI."""
 
     parser = argparse.ArgumentParser(formatter_class=argparse.RawTextHelpFormatter)
 

diff --git a/deepparse/comparer/addresses_comparer.py b/deepparse/comparer/addresses_comparer.py
@@ -10,15 +10,16 @@
 @dataclass(frozen=True)
 class AddressesComparer:
     """
-    Address comparer to compare addresses with each other and retrieves the differences between them. The addresses
-    are parsed using an address parser based on one of the seq2seq pretrained networks either with fastText or BPEmb.
+    Address comparer is used to compare addresses with each other and retrieve the differences between them. The
+    addresses are parsed using an address parser based on one of the seq2seq pretrained networks, either with
+    FastText or BPEmb.
 
-    The address comparer can compare already parsed addresses. The address parser first recompose the raw
-    addresses then suggests its own tags, then it makes a comparison with the tags of the source parsing and the
+    The address comparer can compare already parsed addresses. The address parser first recomposes the raw
+    addresses then suggest its own tags; then it makes a comparison with the tags of the source parsing and the
     newly parsed address
 
     The address comparer is also able to compare raw addresses by first parsing the addresses using the
-    address parser and then brings out the differences among the parsed addresses.
+    address parser and then bring out the differences among the parsed addresses.
 
 
     Args:
@@ -40,15 +41,15 @@ def compare_tags(
     ) -> Union[List[FormattedComparedAddressesTags], FormattedComparedAddressesTags]:
         """
         Compare tags of a source parsing with the parsing from AddressParser. First, it reconstructs the
-        raw address from the parsing, then AddressParser generates tags and then compares the two parsings.
+        raw address from the parsing, AddressParser generates tags and compares the two parsings.
 
         Args:
-            addresses_tags_to_compare (Union[List[tuple], List[List[tuple]]]): list of tuple that contains
-            the tags for the address components from the source. Can compare multiples parsings if passed as a
+            addresses_tags_to_compare (Union[List[tuple], List[List[tuple]]]): list of tuples that contain
+            the tags for the address components from the source. Can compare multiple parsings if passed as a
             list of tuples.
-            with_prob (Union[None, bool]): A option flag to either or not include prob in the comparison report.
-                The probabilities are not compared but only included in the report.
-                The default value is None, which means not taking into account.
+            with_prob (Union[None, bool]): An option flag to either or not include probabilities in the comparison
+                report. The probabilities are not compared but only included in the report. The default value is
+                ``None``, which means not taking into account.
 
         Return:
             Either a :class:`~FormattedComparedAddressesTags` or a list of :class:`~FormattedComparedAddressTags`
@@ -122,16 +123,15 @@ def compare_raw(
         with_prob: Union[None, bool] = None,
     ) -> List[FormattedComparedAddressesRaw]:
         """
-        Compare a list of raw addresses together, it starts by parsing the addresses
-        with the setted parser and then return the differences between the addresses components
-        retrieved with our model.
+        Compare a list of raw addresses together. It starts by parsing the addresses
+        with the parser and then return the differences between the parsed address components of the two addresses.
 
         Args:
             raw_addresses_to_compare (Union[Tuple[str], List[Tuple[str]]]):
-                List of string that represent raw addresses to compare.
-            with_prob (Union[None, bool]): A option flag to either or not include prob in the comparison report.
-                The probabilities are not compared but only included in the report.
-                The default value is None, which means not taking into account.
+                List of strings that represent raw addresses to compare.
+            with_prob (Union[None, bool]): An option flag to either or not include probabilities in the comparison
+                report. The probabilities are not compared but only included in the report. The default value is
+                ``None``, which means not taking into account.
 
         Return:
             Either a :class:`~FormattedComparedAddressesRaw` or a list of
@@ -184,8 +184,8 @@ def compare_raw(
     @staticmethod
     def _format_comparisons_dict(comparison_tuples: List, origin_tuple: Tuple[str, str], with_prob: bool) -> List[Dict]:
         """
-        Return formatted dict that contains two FormattedParsedAddress and the origin name tuple and output it in a
-        dict format.
+        Return formatted dictionary that contains two FormattedParsedAddress and the origin name tuple and output it
+        in a dictionary format.
         """
 
         list_of_formatted_comparisons_dict = []