diff --git a/modules/nf-httpfs/src/main/nextflow/file/http/XFileSystemProvider.groovy b/modules/nf-httpfs/src/main/nextflow/file/http/XFileSystemProvider.groovy index 1fa43da22f..2f00717c2c 100644 --- a/modules/nf-httpfs/src/main/nextflow/file/http/XFileSystemProvider.groovy +++ b/modules/nf-httpfs/src/main/nextflow/file/http/XFileSystemProvider.groovy @@ -16,6 +16,8 @@ package nextflow.file.http +import static nextflow.file.http.XFileSystemConfig.* + import java.nio.ByteBuffer import java.nio.channels.SeekableByteChannel import java.nio.file.AccessDeniedException @@ -44,13 +46,9 @@ import groovy.transform.PackageScope import groovy.util.logging.Slf4j import nextflow.SysEnv import nextflow.extension.FilesEx +import nextflow.file.FileHelper import nextflow.util.InsensitiveMap import sun.net.www.protocol.ftp.FtpURLConnection - -import static XFileSystemConfig.* - -import static nextflow.file.http.XFileSystemConfig.config - /** * Implements a read-only JSR-203 compliant file system provider for http/ftp protocols * @@ -64,6 +62,7 @@ abstract class XFileSystemProvider extends FileSystemProvider { private Map fileSystemMap = new LinkedHashMap<>(20) + private static final int[] REDIRECT_CODES = [301, 302, 307, 308] protected static String config(String name, def defValue) { return SysEnv.containsKey(name) ? SysEnv.get(name) : defValue.toString() @@ -185,18 +184,25 @@ abstract class XFileSystemProvider extends FileSystemProvider { protected URLConnection toConnection0(URL url, int attempt) { final conn = url.openConnection() conn.setRequestProperty("User-Agent", 'Nextflow/httpfs') + if( conn instanceof HttpURLConnection ) { + // by default HttpURLConnection does redirect only within the same host + // disable the built-in to implement custom redirection logic (see below) + conn.setInstanceFollowRedirects(false) + } if( url.userInfo ) { conn.setRequestProperty("Authorization", auth(url.userInfo)); } else { XAuthRegistry.instance.authorize(conn) } - if ( conn instanceof HttpURLConnection && conn.getResponseCode() in [307, 308] && attempt < MAX_REDIRECT_HOPS ) { + if ( conn instanceof HttpURLConnection && conn.getResponseCode() in REDIRECT_CODES && attempt < MAX_REDIRECT_HOPS ) { final header = InsensitiveMap.of(conn.getHeaderFields()) - String location = header.get("Location")?.get(0) - URL newPath = new URI(location).toURL() - log.debug "Remote redirect URL: $newPath" - return toConnection0(newPath, attempt+1) + final location = header.get("Location")?.get(0) + log.debug "Remote redirect location: $location" + final newUrl = new URI(absLocation(location,url)).toURL() + if( url.protocol=='https' && newUrl.protocol=='http' ) + throw new IOException("Refuse to follow redirection from HTTPS to HTTP (unsafe) URL - origin: $url - target: $newUrl") + return toConnection0(newUrl, attempt+1) } else if( conn instanceof HttpURLConnection && conn.getResponseCode() in config().retryCodes() && attempt < config().maxAttempts() ) { final delay = (Math.pow(config().backOffBase(), attempt) as long) * config().backOffDelay() @@ -212,6 +218,18 @@ abstract class XFileSystemProvider extends FileSystemProvider { return conn } + protected String absLocation(String location, URL target) { + assert location, "Missing location argument" + assert target, "Missing target URL argument" + + final base = FileHelper.baseUrl(location) + if( base ) + return location + if( !location.startsWith('/') ) + location = '/' + location + return "${target.protocol}://${target.authority}$location" + } + @Override SeekableByteChannel newByteChannel(Path path, Set options, FileAttribute... attrs) throws IOException { diff --git a/modules/nf-httpfs/src/test/nextflow/file/http/HttpFilesTests.groovy b/modules/nf-httpfs/src/test/nextflow/file/http/HttpFilesTests.groovy index 3090144fc5..041f1f2f3e 100644 --- a/modules/nf-httpfs/src/test/nextflow/file/http/HttpFilesTests.groovy +++ b/modules/nf-httpfs/src/test/nextflow/file/http/HttpFilesTests.groovy @@ -112,7 +112,7 @@ class HttpFilesTests extends Specification { def lines = Files.readAllLines(path, Charset.forName('UTF-8')) then: lines.size()>0 - lines[0] == '' + lines[0] == '' } diff --git a/modules/nf-httpfs/src/test/nextflow/file/http/XFileSystemProviderTest.groovy b/modules/nf-httpfs/src/test/nextflow/file/http/XFileSystemProviderTest.groovy index 9bbabfa06e..8d6e93463c 100644 --- a/modules/nf-httpfs/src/test/nextflow/file/http/XFileSystemProviderTest.groovy +++ b/modules/nf-httpfs/src/test/nextflow/file/http/XFileSystemProviderTest.groovy @@ -21,7 +21,6 @@ import java.nio.file.Path import com.github.tomakehurst.wiremock.junit.WireMockRule import com.github.tomjankes.wiremock.WireMockGroovy -import nextflow.SysEnv import org.junit.Rule import spock.lang.Specification import spock.lang.Unroll @@ -106,7 +105,7 @@ class XFileSystemProviderTest extends Specification { when: def attrs = fsp.readHttpAttributes(path) then: - attrs.lastModifiedTime() == null + attrs.lastModifiedTime() attrs.size() > 0 } @@ -157,6 +156,7 @@ class XFileSystemProviderTest extends Specification { @Rule WireMockRule wireMockRule = new WireMockRule(18080) + @Unroll def 'should follow a redirect when read a http file '() { given: def wireMock = new WireMockGroovy(18080) @@ -180,14 +180,14 @@ class XFileSystemProviderTest extends Specification { response { status HTTP_CODE headers { - "Location" "http://localhost:18080/redirected.html" + "Location" "http://localhost:18080/target.html" } } } wireMock.stub { request { method "GET" - url "/redirected.html" + url "/target.html" } response { status 200 @@ -212,22 +212,36 @@ class XFileSystemProviderTest extends Specification { Files.size(path) == EXPECTED where: - HTTP_CODE | REDIRECT_TO | EXPECTED - 300 | "/redirected.html" | 10 - 300 | "/index2.html" | 10 - - 301 | "/redirected.html" | 10 - 301 | "/index2.html" | 10 + HTTP_CODE | REDIRECT_TO | EXPECTED + 301 | "/target.html" | 10 + 301 | "/index2.html" | 10 - 302 | "/redirected.html" | 10 - 302 | "/index2.html" | 10 + 302 | "/target.html" | 10 + 302 | "/index2.html" | 10 - 307 | "/redirected.html" | 10 - 307 | "/index2.html" | 10 + 307 | "/target.html" | 10 + 307 | "/index2.html" | 10 - 308 | "/redirected.html" | 10 - 308 | "/index2.html" | 10 + 308 | "/target.html" | 10 + 308 | "/index2.html" | 10 //infinite redirect to himself - 308 | "/index.html" | -1 + 308 | "/index.html" | -1 + } + + def 'should normalize location' () { + given: + def provider = Spy(XFileSystemProvider) + + expect: + provider.absLocation(LOCATION, new URL(TARGET)) == EXPECTED + + where: + LOCATION | TARGET | EXPECTED + 'https://this/that' | 'http://foo.com:123' | 'https://this/that' + '/' | 'http://foo.com:123' | 'http://foo.com:123/' + '/this/that' | 'http://foo.com:123' | 'http://foo.com:123/this/that' + '/this/that' | 'http://foo.com:123/abc' | 'http://foo.com:123/this/that' + 'this/that' | 'http://foo.com:123/abc' | 'http://foo.com:123/this/that' + } }