diff --git a/.travis.yml b/.travis.yml index 91a9d430e..2d619232c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -82,8 +82,24 @@ jobs: -e TEST_RETRY_COUNT=3 web npm run tests - stage: tests - name: similarity-tests - if: branch = develop OR branch = master OR commit_message =~ /\[full ci\]/ OR commit_message =~ /\[similarity tests\]/ + name: media-similarity-tests + if: branch = develop OR branch = master OR commit_message =~ /\[full ci\]/ OR commit_message =~ /\[media similarity tests\]/ + script: + - docker-compose exec web service nginx start + - docker-compose -f docker-compose.yml -f docker-test.yml exec chromedriver service nginx start + - docker-compose exec + -e IMGUR_CLIENT_ID=$IMGUR_CLIENT_ID + -e TRAVIS_JOB_NAME=$TRAVIS_JOB_NAME + -e TRAVIS_BRANCH=$TRAVIS_BRANCH + -e AWS_ENDPOINT=$AWS_ENDPOINT + -e AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID + -e AWS_SECRET_ACCESS_KEY=$AWS_SECRET_ACCESS_KEY + -e AWS_REGION=$AWS_REGION + -e TEST_RETRY_COUNT=3 + web npm run tests + - stage: tests + name: text-similarity-tests + if: branch = develop OR branch = master OR commit_message =~ /\[full ci\]/ OR commit_message =~ /\[text similarity tests\]/ script: - docker-compose exec web service nginx start - docker-compose -f docker-compose.yml -f docker-test.yml exec chromedriver service nginx start @@ -99,7 +115,7 @@ jobs: web npm run tests - stage: tests name: unit-tests - if: branch != develop AND branch != master AND NOT commit_message =~ /\[full ci\]/ AND NOT commit_message =~ /\[smoke tests\]/ AND NOT commit_message =~ /\[similarity tests\]/ + if: branch != develop AND branch != master AND NOT commit_message =~ /\[full ci\]/ AND NOT commit_message =~ /\[smoke tests\]/ AND NOT commit_message =~ /\[media similarity tests\]/ AND NOT commit_message =~ /\[text similarity tests\]/ script: - docker-compose exec -e TRAVIS_JOB_NAME=$TRAVIS_JOB_NAME web npm run tests after_script: diff --git a/build.sh b/build.sh index 8ae56ac43..67607b74d 100755 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/bash # Running only unit tests -if [[ $TRAVIS_BRANCH != 'develop' && $TRAVIS_BRANCH != 'master' && ! $TRAVIS_COMMIT_MESSAGE =~ \[full\ ci\] && ! $TRAVIS_COMMIT_MESSAGE =~ \[smoke\ tests\] && ! $TRAVIS_COMMIT_MESSAGE =~ \[similarity\ tests\] ]] +if [[ $TRAVIS_BRANCH != 'develop' && $TRAVIS_BRANCH != 'master' && ! $TRAVIS_COMMIT_MESSAGE =~ \[full\ ci\] && ! $TRAVIS_COMMIT_MESSAGE =~ \[smoke\ tests\] && ! $TRAVIS_COMMIT_MESSAGE =~ \[text\ similarity\ tests\] && ! $TRAVIS_COMMIT_MESSAGE =~ \[media\ similarity\ tests\] ]] then echo "Running only unit tests" docker-compose build web @@ -14,34 +14,40 @@ else docker-compose build web api api-background pender pender-background docker-compose -f docker-compose.yml -f docker-test.yml up -d web api api-background pender pender-background chromedriver else - i=0 - NGROK_URL="" - ngrok config add-authtoken $NGROK_AUTH - ngrok config upgrade 2 - while [ -z "$NGROK_URL" -a $i -lt 5 ]; do - i=$(($i + 1)) - ngrok http 9000 >/dev/null & - until curl --silent -I -f --fail http://localhost:4040; do printf "."; sleep 10; done - curl -I -v http://localhost:4040 - curl localhost:4040/api/tunnels > ngrok.json - cat ngrok.json - NGROK_URL=$(grep -Po '"public_url": *\K"[^"]*"' ngrok.json | tail -n1 | sed 's/.\(.*\)/\1/' | sed 's/\(.*\)./\1/') + if [[ $TRAVIS_JOB_NAME == 'media-similarity-tests' ]] + then + i=0 + NGROK_URL="" + ngrok config add-authtoken $NGROK_AUTH + ngrok config upgrade 2 + while [ -z "$NGROK_URL" -a $i -lt 5 ]; do + i=$(($i + 1)) + ngrok http 9000 >/dev/null & + until curl --silent -I -f --fail http://localhost:4040; do printf "."; sleep 10; done + curl -I -v http://localhost:4040 + curl localhost:4040/api/tunnels > ngrok.json + cat ngrok.json + NGROK_URL=$(grep -Po '"public_url": *\K"[^"]*"' ngrok.json | tail -n1 | sed 's/.\(.*\)/\1/' | sed 's/\(.*\)./\1/') + if [ -z $NGROK_URL ] + then + kill -9 $(pgrep ngrok) + fi + sleep 5 + done if [ -z $NGROK_URL ] then - kill -9 $(pgrep ngrok) + echo "Not able to connect a Ngrok Tunnel. Please try again!" + exit 1 fi - sleep 5 - done - if [ -z $NGROK_URL ] - then - echo "Not able to connect a Ngrok Tunnel. Please try again!" - exit 1 + echo "Ngrok tunnel: $NGROK_URL" + sed -i "s~similarity_media_file_url_host: ''~similarity_media_file_url_host: '$NGROK_URL'~g" check-api/config/config.yml + cat check-api/config/config.yml | grep similarity_media_file_url_host + docker-compose build web api api-background pender pender-background chromedriver alegre presto-server presto-audio presto-image presto-video + docker-compose -f docker-compose.yml -f docker-test.yml up -d web api api-background pender pender-background chromedriver alegre presto-server presto-audio presto-image presto-video + else + docker-compose build web api api-background pender pender-background chromedriver alegre presto-server presto-mean-tokens + docker-compose -f docker-compose.yml -f docker-test.yml up -d web api api-background pender pender-background chromedriver alegre presto-server presto-mean-tokens fi - echo "Ngrok tunnel: $NGROK_URL" - sed -i "s~similarity_media_file_url_host: ''~similarity_media_file_url_host: '$NGROK_URL'~g" check-api/config/config.yml - cat check-api/config/config.yml | grep similarity_media_file_url_host - docker-compose build - docker-compose -f docker-compose.yml -f docker-test.yml up -d until curl --silent -I -f --fail http://localhost:3100; do printf .; sleep 1; done until curl --silent -I -f --fail http://localhost:8000/ping; do printf .; sleep 1; done fi diff --git a/docker-compose.yml b/docker-compose.yml index f1b18b2a0..86340bb78 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -187,8 +187,8 @@ services: networks: - dev depends_on: - elasticmq: - condition: service_healthy + - elasticmq + - redis presto-video: build: presto platform: linux/amd64 @@ -204,6 +204,21 @@ services: depends_on: elasticmq: condition: service_healthy + presto-mean-tokens: + build: presto + platform: linux/amd64 + volumes: + - "./presto:/app" + env_file: + - presto/.env_file + environment: + ROLE: worker + MODEL_NAME: mean_tokens.Model + networks: + - dev + depends_on: + elasticmq: + condition: service_healthy alegre: build: alegre ports: @@ -214,9 +229,6 @@ services: - redis - elasticsearch - presto-server - - presto-image - - presto-audio - - presto-video env_file: - alegre/.env_file networks: diff --git a/docker-test.yml b/docker-test.yml index 4d0e04be1..9df241aa1 100644 --- a/docker-test.yml +++ b/docker-test.yml @@ -54,6 +54,9 @@ services: presto-video: environment: BOILERPLATE_ENV: test + presto-mean-tokens: + environment: + BOILERPLATE_ENV: test web: environment: PLATFORM: web diff --git a/nginx.conf b/nginx.conf index 9bc91a861..e6a5d7fdf 100644 --- a/nginx.conf +++ b/nginx.conf @@ -23,5 +23,4 @@ http { proxy_set_header X-Forwarded-Host $server_name; } } - } diff --git a/test/parallel-media-similarity-test.sh b/test/parallel-media-similarity-test.sh new file mode 100755 index 000000000..b3d0110a8 --- /dev/null +++ b/test/parallel-media-similarity-test.sh @@ -0,0 +1,22 @@ +#!/bin/bash + +echo 'Starting tests...' + +FAIL=0 + +bundle exec rspec --tag bin8 spec/integration_spec.rb & + +for job in `jobs -p` +do + echo $job + wait $job || let "FAIL+=1" +done + +echo $FAIL + +if [ "$FAIL" == "0" ]; +then + exit 0 +else + exit 1 +fi \ No newline at end of file diff --git a/test/parallel-similarity-test.sh b/test/parallel-text-similarity-test.sh similarity index 100% rename from test/parallel-similarity-test.sh rename to test/parallel-text-similarity-test.sh diff --git a/test/run-tests.sh b/test/run-tests.sh index 40bbb54c3..7f64f54a9 100755 --- a/test/run-tests.sh +++ b/test/run-tests.sh @@ -10,6 +10,10 @@ elif [[ $TRAVIS_JOB_NAME == 'unit-tests' ]] then npm run test:unit || exit 1 ./../scripts/uncovered-files -else - ./parallel-similarity-test.sh +elif [[ $TRAVIS_JOB_NAME == 'media-similarity-tests' ]] +then + ./parallel-media-similarity-test.sh +elif [[ $TRAVIS_JOB_NAME == 'text-similarity-tests' ]] +then + ./parallel-text-similarity-test.sh fi \ No newline at end of file diff --git a/test/spec/api_helpers.rb b/test/spec/api_helpers.rb index 2affd0767..401ad500c 100644 --- a/test/spec/api_helpers.rb +++ b/test/spec/api_helpers.rb @@ -11,12 +11,21 @@ def request_api(path, params) require 'net/http' uri = URI(api_path + path) uri.query = URI.encode_www_form(params) - response = Net::HTTP.get_response(uri) ret = nil + response = nil begin + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = (uri.scheme == 'https') + http.open_timeout = 120 # Time to open the connection + http.read_timeout = 120 # Time to wait for the response + request = Net::HTTP::Get.new(uri.request_uri) + response = http.request(request) ret = OpenStruct.new JSON.parse(response.body)['data'] + puts "Successful response when calling #{path} with params '#{params.inspect}': #{response.body}" + rescue Net::ReadTimeout + puts "Timeout when calling #{path} with params '#{params.inspect}'" rescue StandardError - print "Failed to parse body of response for endpoint `#{path}`:\n#{response.inspect}\n" unless response.class <= Net::HTTPSuccess + puts "Failed to parse body of response for endpoint #{path}: Response: #{response.inspect} Body: #{response.body}" unless response.class <= Net::HTTPSuccess end ret end @@ -47,8 +56,10 @@ def api_create_team_and_bot(params = {}) user = params[:user] || api_register_and_login_with_email @slug = "test-team-#{Time.now.to_i}-#{rand(10_000).to_i}" team = request_api 'team', { name: "Test Team #{Time.now.to_i}", slug: @slug, email: user.email } - api_install_bot(params[:bot], team[:slug], params[:score]) if params[:bot] sleep 5 + puts "team created: #{team.inspect}" + api_install_bot(params[:bot], team[:slug], params[:score]) if params[:bot] + sleep 10 { user: user, team: team } end @@ -161,9 +172,12 @@ def api_suggest_similarity_between_items(team_id, source, target) end def api_install_bot(bot, slug = nil, settings = {}) + settings ||= { min_es_score: 0 } url = @driver.current_url.to_s team_slug = slug || url.match(%r{^https?://[^/]+/([^/]+)})[1] + puts "Installing bot with settings: #{settings.inspect}" request_api 'install_bot', { bot: bot, slug: team_slug, settings: settings.to_json } + sleep 2 @driver.navigate.to url end diff --git a/test/spec/app_spec_helpers.rb b/test/spec/app_spec_helpers.rb index 8985ad353..8808aa871 100644 --- a/test/spec/app_spec_helpers.rb +++ b/test/spec/app_spec_helpers.rb @@ -161,7 +161,7 @@ def create_media(url, wait_for_creation = true) def create_image(file) # show the side navigation for workspace tipline - wait_for_selector('#side-navigation__tipline-toggle').click + @driver.navigate.to "#{@config['self_url']}/#{@slug}/all-items" wait_for_selector('.projects-list') wait_for_selector('.projects-list__all-items').click wait_for_selector('#create-media-button__open-button').click diff --git a/test/spec/similarity_spec.rb b/test/spec/similarity_spec.rb index b043d4546..6a5cef1a6 100644 --- a/test/spec/similarity_spec.rb +++ b/test/spec/similarity_spec.rb @@ -65,31 +65,48 @@ it 'should identify texts as similar', bin7: true do data = api_create_team_and_bot(bot: 'alegre', score: { min_es_score: 0 }) - pm1 = api_create_claim(data: data, quote: 'Lorem Ipsum is used to generate dummy texts of the printing and TI industry. Lorem Ipsum has been used by the industry for text generation ever since the 1502s.') - sleep 60 # wait for the items to be indexed in the Elasticsearch - api_create_claim(data: data, quote: 'Lorem Ipsum is used to generate dummy texts of the printing and TI industry. Lorem Ipsum has been used by the industry for text generation ever since the 1501s.') - sleep 60 # wait for the items to be indexed in the Elasticsearch - @driver.navigate.to "#{@config['self_url']}/#{data[:team].slug}/media/#{pm1.id}" + pm = api_create_claim(data: data, quote: 'Lorem Ipsum is used to generate dummy texts of the printing and IT industry.') + verbose_wait 3 + api_create_claim(data: data, quote: 'Lorem Ipsum is used to generate dummy texts of the printing and IT industry!') + verbose_wait 3 + @driver.navigate.to "#{@config['self_url']}/#{data[:team].slug}/media/#{pm.id}" wait_for_selector('.media__more-medias') expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 end - it 'should identify videos as similar', bin7: true do - api_create_team_and_bot(bot: 'alegre') - @driver.navigate.to "#{@config['self_url']}/#{@slug}/settings/workspace" - create_image('files/video.mp4') - verbose_wait 5 - wait_for_selector('.cluster-card') - create_image('files/video2.mp4') - verbose_wait 5 - wait_for_selector('.cluster-card').click + it 'should identify texts as similar with vector search', bin7: true do + data = api_create_team_and_bot(bot: 'alegre', score: { 'master_similarity_enabled' => true, 'text_similarity_enabled' => true, 'text_elasticsearch_matching_threshold' => 0.9, 'text_elasticsearch_suggestion_threshold' => 0.7, 'text_vector_matching_threshold' => 0.95, 'text_vector_suggestion_threshold' => 0.75, 'text_similarity_model' => ['elasticsearch', 'xlm-r-bert-base-nli-stsb-mean-tokens'], 'alegre_model_in_use' => ['elasticsearch', 'xlm-r-bert-base-nli-stsb-mean-tokens'], 'min_es_score' => 100_000 }) + pm = api_create_claim(data: data, quote: 'In January 2003, Batista joined Triple H, Ric Flair and Randy Orton to form the heel stable Evolution.[25] Batista, however, was sidelined for much of 2003 after he tore his right triceps muscle at a Raw live event in a tag team match alongside Orton against The Dudley Boyz.') + verbose_wait 3 + api_create_claim(data: data, quote: 'In January 2003, Batista joined Triple H, Ric Flair and Randy Orton to form the heel stable Evolution.[25] Batista, however, was sidelined for much of 2003 after he tore his right triceps muscle at a Raw live event in a tag team match alongside Orton against The Dudley Boy.') + verbose_wait 3 + @driver.navigate.to "#{@config['self_url']}/#{data[:team].slug}/media/#{pm.id}" + wait_for_selector('.media__more-medias') + expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 + end + + it 'should identify texts as similar with vector search enabled on two models', bin7: true do + data = api_create_team_and_bot(bot: 'alegre', score: { 'master_similarity_enabled' => true, 'text_similarity_enabled' => true, 'text_elasticsearch_matching_threshold' => 0.9, 'text_elasticsearch_suggestion_threshold' => 0.7, 'text_vector_matching_threshold' => 0.95, 'text_vector_suggestion_threshold' => 0.75, 'text_similarity_model' => ['elasticsearch', 'xlm-r-bert-base-nli-stsb-mean-tokens', 'paraphrase-multilingual-mpnet-base-v2'], 'alegre_model_in_use' => ['elasticsearch', 'xlm-r-bert-base-nli-stsb-mean-tokens', 'paraphrase-multilingual-mpnet-base-v2'], 'min_es_score' => 100_000 }) + pm = api_create_claim(data: data, quote: 'The ends of the warp threads are usually fastened to beams. One end is fastened to one beam, the other end to a second beam, so that the warp threads all lie parallel and are all the same length. The beams are held apart to keep the warp threads taut.') + verbose_wait 3 + api_create_claim(data: data, quote: 'The ends of the warp threads are usually fastened to beams. One end is fastened to one beam, the other end to a second beam, so that the warp threads all lie parallel and are all the same length. The beams are held apart to keep the warp threads tight.') + verbose_wait 3 + @driver.navigate.to "#{@config['self_url']}/#{data[:team].slug}/media/#{pm.id}" wait_for_selector('.media__more-medias') expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 end - it 'should identify images as similar', bin7: true do + it 'should prepare environment for media similarity tests', bin8: true do + data = api_create_team_and_bot(bot: 'alegre', score: { min_es_score: 0 }) + pm = api_create_claim(data: data, quote: 'Just kicking off Alegre service.') + sleep 60 # Wait for the item to be sent to Alegre + @driver.navigate.to "#{@config['self_url']}/#{data[:team].slug}/media/#{pm.id}" + wait_for_selector('.quote-media-card') + expect(@driver.find_elements(:css, '.quote-media-card').size).to eq 1 + end + + it 'should identify images as similar', bin8: true do api_create_team_and_bot(bot: 'alegre') - @driver.navigate.to "#{@config['self_url']}/#{@slug}/settings/workspace" create_image('files/similarity.jpg') verbose_wait 4 wait_for_selector('.cluster-card') @@ -100,7 +117,7 @@ expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 end - it 'should extract text from a image', bin7: true do + it 'should extract text from a image', bin8: true do api_create_team_and_bot(bot: 'alegre') @driver.navigate.to "#{@config['self_url']}/#{@slug}/settings/workspace" create_image('files/ocr.png') @@ -111,7 +128,7 @@ expect(@driver.page_source.include?('Test')).to be(true) end - it 'should identify audios as similar', bin7: true do + it 'should identify audios as similar', bin8: true do api_create_team_and_bot(bot: 'alegre') @driver.navigate.to "#{@config['self_url']}/#{@slug}/settings/workspace" create_image('files/audio.mp3') @@ -123,4 +140,16 @@ wait_for_selector('.media__more-medias') expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 end + + it 'should identify videos as similar', bin8: true do + api_create_team_and_bot(bot: 'alegre') + create_image('files/video.mp4') + verbose_wait 6 + wait_for_selector('.cluster-card') + create_image('files/video2.mp4') + verbose_wait 6 + wait_for_selector('.cluster-card').click + wait_for_selector('.media__more-medias') + expect(@driver.find_elements(:css, '.media__relationship').size).to eq 1 + end end