FateScript · Wang-zipeng · Feb 8, 2021 · Feb 8, 2021 · Feb 9, 2021 · Mar 1, 2021
diff --git a/dl_lib/engine/defaults.py b/dl_lib/engine/defaults.py
@@ -66,7 +66,7 @@ def default_argument_parser():
     # PyTorch still may leave orphan processes in multi-gpu training.
     # Therefore we use a deterministic way to obtain port,
     # so that users are aware of orphan processes by seeing the port occupied.
-    port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14
+    port = 2 ** 15 + 2 ** 14 + hash("User_name") % 2 ** 14
     parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
     parser.add_argument(
         "opts",

diff --git a/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu b/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu
@@ -307,6 +307,10 @@ __global__ void RoIAlignBackwardFeature(
 
 namespace dl_lib {
 
+int ceil_div(int a, int b){
+	return  (a + b - 1) / b;
+}
+
 at::Tensor ROIAlign_forward_cuda(
     const at::Tensor& input,
     const at::Tensor& rois,
@@ -334,7 +338,7 @@ at::Tensor ROIAlign_forward_cuda(
   auto output_size = num_rois * pooled_height * pooled_width * channels;
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(output_size, 512L), 4096L));
+  dim3 grid(std::min(ceil_div((int)output_size, 512), 4096));
   dim3 block(512);
 
   if (output.numel() == 0) {
@@ -390,7 +394,7 @@ at::Tensor ROIAlign_backward_cuda(
 
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(grad.numel(), 512L), 4096L));
+  dim3 grid(std::min(ceil_div((int)grad.numel(), 512), 4096));
   dim3 block(512);
 
   // handle possibly empty gradients

diff --git a/playground/centernet.res18.coco.512size/config.py b/playground/centernet.res18.coco.512size/config.py
@@ -52,7 +52,7 @@
     SOLVER=dict(
         OPTIMIZER=dict(
             NAME="SGD",
-            BASE_LR=0.02,
+            BASE_LR=0.002,
             WEIGHT_DECAY=1e-4,
         ),
         LR_SCHEDULER=dict(
@@ -61,7 +61,7 @@
             MAX_ITER=126000,
             WARMUP_ITERS=1000,
         ),
-        IMS_PER_BATCH=128,
+        IMS_PER_BATCH=16,
     ),
     OUTPUT_DIR=osp.join(
         '/data/Outputs/model_logs/playground',

diff --git a/playground/centernet.res18.coco.512size/train_net.py b/playground/centernet.res18.coco.512size/train_net.py
@@ -0,0 +1,126 @@
+# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved
+# Modified by Feng Wang
+"""
+Detection Training Script.
+
+This scripts reads a given config file and runs the training or evaluation.
+It is an entry point that is made to train standard models in dl_lib.
+
+In order to let one script support training of many models,
+this script contains logic that are specific to these built-in models and therefore
+may not be suitable for your own project.
+For example, your research project perhaps only needs a single "evaluator".
+
+Therefore, we recommend you to use dl_lib as an library and take
+this file as an example of how to use the library.
+You may want to write your own script with your datasets and other customizations.
+"""
+
+import os
+import sys
+sys.path.insert(0, '.')  # noqa: E402
+
+from colorama import Fore, Style
+
+import dl_lib.utils.comm as comm
+from config import config
+from dl_lib.checkpoint import DetectionCheckpointer
+from dl_lib.data import MetadataCatalog
+from dl_lib.engine import (DefaultTrainer, default_argument_parser,
+                           default_setup, hooks, launch)
+from dl_lib.evaluation import (COCOEvaluator, DatasetEvaluators,
+                               PascalVOCDetectionEvaluator, verify_results)
+from net import build_model
+
+
+class Trainer(DefaultTrainer):
+    """
+    We use the "DefaultTrainer" which contains a number pre-defined logic for
+    standard training workflow. They may not work for you, especially if you
+    are working on a new research project. In that case you can use the cleaner
+    "SimpleTrainer", or write your own training loop.
+    """
+
+    @classmethod
+    def build_evaluator(cls, cfg, dataset_name, output_folder=None):
+        """
+        Create evaluator(s) for a given dataset.
+        This uses the special metadata "evaluator_type" associated with each builtin dataset.
+        For your own dataset, you can simply create an evaluator manually in your
+        script and do not have to worry about the hacky if-else logic here.
+        """
+        if output_folder is None:
+            output_folder = os.path.join(cfg.OUTPUT_DIR, "inference")
+        evaluator_list = []
+        evaluator_type = MetadataCatalog.get(dataset_name).evaluator_type
+
+        if evaluator_type in ["coco", "coco_panoptic_seg"]:
+            evaluator_list.append(
+                COCOEvaluator(
+                    dataset_name, cfg, True,
+                    output_folder, dump=cfg.GLOBAL.DUMP_TRAIN
+                ))
+        elif evaluator_type == "pascal_voc":
+            return PascalVOCDetectionEvaluator(dataset_name)
+
+        if len(evaluator_list) == 0:
+            raise NotImplementedError(
+                "no Evaluator for the dataset {} with the type {}".format(
+                    dataset_name, evaluator_type
+                )
+            )
+        elif len(evaluator_list) == 1:
+            return evaluator_list[0]
+        return DatasetEvaluators(evaluator_list)
+
+
+def main(args):
+    config.merge_from_list(args.opts)
+    cfg, logger = default_setup(config, args)
+    model = build_model(cfg)
+    logger.info(f"Model structure: {model}")
+    # file_sys = os.statvfs(cfg.OUTPUT_DIR)
+    # free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30
+    # # We assume that a single dumped model is 700Mb
+    # eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10
+    # if eval_space_Gb > free_space_Gb:
+    #     logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) "
+    #                    f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}")
+    if args.eval_only:
+        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
+            cfg.MODEL.WEIGHTS, resume=args.resume
+        )
+        res = Trainer.test(cfg, model)
+        if comm.is_main_process():
+            verify_results(cfg, res)
+        if cfg.TEST.AUG.ENABLED:
+            res.update(Trainer.test_with_TTA(cfg, model))
+        return res
+
+    """
+    If you'd like to do anything fancier than the standard training logic,
+    consider writing your own training loop or subclassing the trainer.
+    """
+    trainer = Trainer(cfg, model)
+    trainer.resume_or_load(resume=args.resume)
+    if cfg.TEST.AUG.ENABLED:
+        trainer.register_hooks(
+            [hooks.EvalHook(0, lambda: trainer.test_with_TTA(cfg, trainer.model))]
+        )
+
+    return trainer.train()
+
+
+if __name__ == "__main__":
+    args = default_argument_parser().parse_args()
+    print("soft link to {}".format(config.OUTPUT_DIR))
+    config.link_log()
+    print("Command Line Args:", args)
+    launch(
+        main,
+        args.num_gpus,
+        num_machines=args.num_machines,
+        machine_rank=args.machine_rank,
+        dist_url=args.dist_url,
+        args=(args,),
+    )
diff --git a/setup.py b/setup.py
@@ -38,6 +38,7 @@ def get_extensions():
             "-D__CUDA_NO_HALF_OPERATORS__",
             "-D__CUDA_NO_HALF_CONVERSIONS__",
             "-D__CUDA_NO_HALF2_OPERATORS__",
+            "-D _WIN64",
         ]
 
         # It's better if pytorch can do this by default ..