diff --git a/.github/workflows/docker-image.yml b/.github/workflows/docker-image.yml new file mode 100644 index 0000000..027e2bc --- /dev/null +++ b/.github/workflows/docker-image.yml @@ -0,0 +1,30 @@ +name: Docker Image CI + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + +jobs: + + build: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Copy the compatible ParaAT version + run: cp -f /app/ParaAT.pl /app/ParaAT2.0/ParaAT.pl + - name: Build the Docker image + run: docker build . --file Dockerfile --tag dualhgt:$(date +%s) + - + name: Login to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + - name: Push the Docker image + run: | + docker push dualhgt:$(date +%s) + docker push dualhgt:${{ github.sha }} diff --git a/Dockerfile b/Dockerfile index 2e5280d..f22d225 100644 --- a/Dockerfile +++ b/Dockerfile @@ -75,14 +75,16 @@ RUN apt-get install -y locate libc6 muscle #USER appuser # Copy the source code into the container. -COPY . . +RUN git clone https://github.com/cl3mente/PlantPath2023.git + +RUN cp -f /app/PlantPath2023/ParaAT.pl /app/ParaAT2.0/ParaAT.pl ENV PATH=$PATH:/app/KaKs_Calculator3.0/src:/app/ParaAT2.0:/app/OrthoFinder RUN mkdir input -RUN cp -f /app/ParaAT.pl /app/ParaAT2.0/ParaAT.pl + VOLUME /app/input # CMD python HGT.py -i /data/bioinf2023/PlantPath2023/genomeANDgff -OFr /data/bioinf2023/PlantPath2023/genomeANDgff/results/prot/f7b812/Results_Feb23 -v -nt 50 -#ENTRYPOINT ["python","./HGT.py", "-i", "input"] \ No newline at end of file +#ENTRYPOINT ["python","./HGT.py", "-i", "input"] diff --git a/dualHGT.py b/dualHGT.py index 5de1ece..7b95517 100644 --- a/dualHGT.py +++ b/dualHGT.py @@ -771,7 +771,9 @@ def getHGT(matrix, gene_association): matrix2 = append_species(matrix, gene_association) # remove NAs - matrix2 = matrix2[matrix2['dist'] != "NA"] + #matrix2 = matrix2[matrix2['dist'] != "NA"] + matrix2[matrix2['dist'] == "NA"] = 0 + matrix2['dist'] = pd.to_numeric(matrix2['dist'], downcast='float') # initialize an empty column for the HGT score @@ -924,7 +926,7 @@ def prepare_input(arg): except FileExistsError: print(f"Folder '{output_folder}' already exists. Wait a second??") - logfile = os.path.join(output_folder, 'log.txt') + logfile = os.path.join(output_folder, f'log_{current_date}.txt') logfile = open(logfile, 'w') def write_and_log(msg): diff --git a/dualHGT/dualHGT/dualHGT.py b/dualHGT/dualHGT/dualHGT.py index a7db927..84133c4 100644 --- a/dualHGT/dualHGT/dualHGT.py +++ b/dualHGT/dualHGT/dualHGT.py @@ -535,7 +535,7 @@ def kaksparallel(file: str) -> list: if not os.path.exists(output) or not os.path.getsize(output) > 0: runkaks = KAKS % (file, #the .axt file passed as input output, # the output file - "NG") # NG is the model used for the calculation + "MA") # Model Averaging is used for the calculation run = subprocess.Popen(runkaks, shell=True, stderr=subprocess.PIPE, stdout=subprocess.PIPE) out, err = run.communicate() @@ -771,7 +771,8 @@ def getHGT(matrix, gene_association): matrix2 = append_species(matrix, gene_association) # remove NAs - matrix2 = matrix2[matrix2['dist'] != "NA"] + # matrix2 = matrix2[matrix2['dist'] != "NA"] + matrix2[matrix2['dist'] == "NA"] = 0 matrix2['dist'] = pd.to_numeric(matrix2['dist'], downcast='float') # initialize an empty column for the HGT score