FateScript · Wang-zipeng · Feb 8, 2021 · Feb 8, 2021 · Feb 9, 2021 · Mar 1, 2021
diff --git a/dl_lib/engine/defaults.py b/dl_lib/engine/defaults.py
@@ -13,6 +13,7 @@
 import logging
 import os
 from collections import OrderedDict
+from getpass import getuser
 
 import torch
 from torch.nn.parallel import DistributedDataParallel
@@ -66,7 +67,7 @@ def default_argument_parser():
     # PyTorch still may leave orphan processes in multi-gpu training.
     # Therefore we use a deterministic way to obtain port,
     # so that users are aware of orphan processes by seeing the port occupied.
-    port = 2 ** 15 + 2 ** 14 + hash(os.getuid()) % 2 ** 14
+    port = 2 ** 15 + 2 ** 14 + hash(getuser()) % 2 ** 14
-    port = 2 ** 15 + 2 ** 14 + hash(getuser()) % 2 ** 14
+    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
-    port = 2 ** 15 + 2 ** 14 + hash(getuser()) % 2 ** 14
+    port = 2 ** 15 + 2 ** 14 + hash(os.getuid() if sys.platform != "win32" else 1) % 2 ** 14
     parser.add_argument("--dist-url", default="tcp://127.0.0.1:{}".format(port))
     parser.add_argument(
         "opts",

diff --git a/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu b/dl_lib/layers/ROIAlign/ROIAlign_cuda.cu
@@ -307,6 +307,10 @@ __global__ void RoIAlignBackwardFeature(
 
 namespace dl_lib {
 
+int ceil_div(int a, int b){
+	return  (a + b - 1) / b;
+}
+
 at::Tensor ROIAlign_forward_cuda(
     const at::Tensor& input,
     const at::Tensor& rois,
@@ -334,7 +338,7 @@ at::Tensor ROIAlign_forward_cuda(
   auto output_size = num_rois * pooled_height * pooled_width * channels;
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(output_size, 512L), 4096L));
+  dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(output_size), static_cast<int64_t>(512)), static_cast<int64_t>(4096)));
   dim3 block(512);
 
   if (output.numel() == 0) {
@@ -390,7 +394,7 @@ at::Tensor ROIAlign_backward_cuda(
 
   cudaStream_t stream = at::cuda::getCurrentCUDAStream();
 
-  dim3 grid(std::min(at::cuda::ATenCeilDiv(grad.numel(), 512L), 4096L));
+  dim3 grid(std::min(at::cuda::ATenCeilDiv(static_cast<int64_t>(grad.numel()), static_cast<int64_t>(512)), static_cast<int64_t>(4096)));
   dim3 block(512);
 
   // handle possibly empty gradients

diff --git a/setup.py b/setup.py
@@ -4,6 +4,7 @@
 
 import glob
 import os
+import platform
 
 import torch
 from setuptools import find_packages, setup
@@ -12,6 +13,7 @@
 torch_ver = [int(x) for x in torch.__version__.split(".")[:2]]
 assert torch_ver >= [1, 3], "Requires PyTorch >= 1.3"
 
+os_name = platform.system()
 
 def get_extensions():
     this_dir = os.path.dirname(os.path.abspath(__file__))
@@ -39,6 +41,8 @@ def get_extensions():
             "-D__CUDA_NO_HALF_CONVERSIONS__",
             "-D__CUDA_NO_HALF2_OPERATORS__",
         ]
+        if "Windows" == os_name:
+            extra_compile_args["nvcc"].append("-D _WIN64")
 
         # It's better if pytorch can do this by default ..
         CC = os.environ.get("CC", None)
@@ -61,13 +65,28 @@ def get_extensions():
 
 
 cur_dir = os.getcwd()
-with open("tools/dl_train", "w") as dl_lib_train:
+
+if "Windows" == os_name:
+    dl_train_name = "tools/dl_train.bat"
+    dl_test_name = "tools/dl_test.bat"
+    head = f"set OMP_NUM_THREADS=1\n"
+    python_command = "python"
+    parameters = "%*"
+elif "Linux" == os_name:
+    dl_train_name = "tools/dl_train"
+    dl_test_name = "tools/dl_test"
     head = f"#!/bin/bash\n\nexport OMP_NUM_THREADS=1\n"
+    python_command = "python3"
+    parameters = "$@"
+else:
+    raise Exception("Target OS not support")
+
+with open(dl_train_name, "w") as dl_lib_train:
     dl_lib_train.write(
-        head + f"python3 {os.path.join(cur_dir, 'tools', 'train_net.py')} $@")
-with open("tools/dl_test", "w") as dl_lib_test:
+        head + f"{python_command} {os.path.join(cur_dir, 'tools', 'train_net.py')} {parameters}")
+with open(dl_test_name, "w") as dl_lib_test:
     dl_lib_test.write(
-        head + f"python3 {os.path.join(cur_dir, 'tools', 'test_net.py')} $@")
+        head + f"{python_command} {os.path.join(cur_dir, 'tools', 'test_net.py')} {parameters}")
 
 setup(
     name="dl_lib",
@@ -95,5 +114,6 @@ def get_extensions():
     extras_require={"all": ["shapely", "psutil"]},
     ext_modules=get_extensions(),
     cmdclass={"build_ext": torch.utils.cpp_extension.BuildExtension},
-    scripts=["tools/dl_train", "tools/dl_test"],
+    scripts=["tools/dl_train", "tools/dl_test"] if 'Linux' == os_name
+    else ["tools/dl_train.bat", "tools/dl_test.bat"],
 )
diff --git a/tools/train_net.py b/tools/train_net.py
@@ -19,6 +19,7 @@
 import os
 import sys
 sys.path.insert(0, '.')  # noqa: E402
+import platform
 
 from colorama import Fore, Style
 
@@ -79,13 +80,14 @@ def main(args):
     cfg, logger = default_setup(config, args)
     model = build_model(cfg)
     logger.info(f"Model structure: {model}")
-    file_sys = os.statvfs(cfg.OUTPUT_DIR)
-    free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30
-    # We assume that a single dumped model is 700Mb
-    eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10
-    if eval_space_Gb > free_space_Gb:
-        logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) "
-                       f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}")
+    if "Linux" == platform.system():
-    if "Linux" == platform.system():
+    if sys.platform == "linux":
-    if "Linux" == platform.system():
+    if sys.platform == "linux":
+        file_sys = os.statvfs(cfg.OUTPUT_DIR)
+        free_space_Gb = (file_sys.f_bfree * file_sys.f_frsize) / 2**30
+        # We assume that a single dumped model is 700Mb
+        eval_space_Gb = (cfg.SOLVER.LR_SCHEDULER.MAX_ITER // cfg.SOLVER.CHECKPOINT_PERIOD) * 700 / 2**10
+        if eval_space_Gb > free_space_Gb:
+            logger.warning(f"{Fore.RED}Remaining space({free_space_Gb}GB) "
+                           f"is less than ({eval_space_Gb}GB){Style.RESET_ALL}")
     if args.eval_only:
         DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
             cfg.MODEL.WEIGHTS, resume=args.resume