Skip to content

Commit

Permalink
Merge pull request #2178 from broadinstitute/development
Browse files Browse the repository at this point in the history
Release 1.86.1
  • Loading branch information
jlchang authored Dec 4, 2024
2 parents 2cbc324 + b7a6e83 commit 21c4c5e
Show file tree
Hide file tree
Showing 14 changed files with 206 additions and 65 deletions.
14 changes: 7 additions & 7 deletions Gemfile.lock
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ GEM
listen (3.5.1)
rb-fsevent (~> 0.10, >= 0.10.3)
rb-inotify (~> 0.9, >= 0.9.10)
loofah (2.22.0)
loofah (2.23.1)
crass (~> 1.0.2)
nokogiri (>= 1.12.0)
macaddr (1.7.2)
Expand Down Expand Up @@ -326,11 +326,11 @@ GEM
net-protocol
netrc (0.11.0)
nio4r (2.7.3)
nokogiri (1.16.6-arm64-darwin)
nokogiri (1.16.8-arm64-darwin)
racc (~> 1.4)
nokogiri (1.16.6-x86_64-darwin)
nokogiri (1.16.8-x86_64-darwin)
racc (~> 1.4)
nokogiri (1.16.6-x86_64-linux)
nokogiri (1.16.8-x86_64-linux)
racc (~> 1.4)
oauth2 (1.4.7)
faraday (>= 0.8, < 2.0)
Expand Down Expand Up @@ -364,7 +364,7 @@ GEM
public_suffix (5.0.4)
puma (5.6.9)
nio4r (~> 2.0)
racc (1.8.0)
racc (1.8.1)
rack (2.2.9)
rack-brotli (1.1.0)
brotli (>= 0.1.7)
Expand Down Expand Up @@ -396,9 +396,9 @@ GEM
activesupport (>= 5.0.0)
minitest
nokogiri (>= 1.6)
rails-html-sanitizer (1.6.0)
rails-html-sanitizer (1.6.1)
loofah (~> 2.21)
nokogiri (~> 1.14)
nokogiri (>= 1.15.7, != 1.16.7, != 1.16.6, != 1.16.5, != 1.16.4, != 1.16.3, != 1.16.2, != 1.16.1, != 1.16.0.rc1, != 1.16.0)
railties (6.1.7.8)
actionpack (= 6.1.7.8)
activesupport (= 6.1.7.8)
Expand Down
6 changes: 4 additions & 2 deletions app/controllers/api/v1/study_files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,9 @@ def perform_update!(study_file)
end
end

if safe_file_params[:upload].present? && !is_chunked || safe_file_params[:remote_location].present?
if safe_file_params[:upload].present? && !is_chunked ||
safe_file_params[:remote_location].present? ||
study_file.needs_raw_counts_extraction?
complete_upload_process(study_file, parse_on_upload)
end
end
Expand Down Expand Up @@ -445,7 +447,7 @@ def chunk
def complete_upload_process(study_file, parse_on_upload)
# set status to uploaded on full create, unless file is parsed and this is just an update
study_file.update!(status: 'uploaded', parse_status: 'unparsed') unless study_file.parsed?
if parse_on_upload
if parse_on_upload || study_file.needs_raw_counts_extraction?
if study_file.parseable?
persist_on_fail = study_file.remote_location.present?
FileParseService.run_parse_job(@study_file, @study, current_api_user, persist_on_fail:)
Expand Down
6 changes: 3 additions & 3 deletions app/javascript/components/upload/ExpressionFileForm.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ export default function ExpressionFileForm({
const speciesOptions = fileMenuOptions.species.map(spec => ({ label: spec.common_name, value: spec.id }))
const selectedSpecies = speciesOptions.find(opt => opt.value === file.taxon_id)
const isMtxFile = file.file_type === 'MM Coordinate Matrix'
const rawCountsInfo = file.expression_file_info.is_raw_counts
const isRawCountsFile = rawCountsInfo === 'true' || rawCountsInfo
const rawCountsInfo = file.expression_file_info.is_raw_counts
const isRawCountsFile = rawCountsInfo === 'true' || rawCountsInfo === true
const [showRawCountsUnits, setShowRawCountsUnits] = useState(isRawCountsFile)

const allowedFileExts = isMtxFile ? FileTypeExtensions.mtx : FileTypeExtensions.plainText
let requiredFields = showRawCountsUnits ? RAW_COUNTS_REQUIRED_FIELDS : REQUIRED_FIELDS
const rawCountsRequired = featureFlagState && featureFlagState.raw_counts_required_frontend
const rawCountsRequired = featureFlagState && featureFlagState.raw_counts_required_frontend && !isAnnDataExperience
if (rawCountsRequired && !isRawCountsFile ) {
requiredFields = requiredFields.concat(PROCESSED_ASSOCIATION_FIELD)
}
Expand Down
5 changes: 5 additions & 0 deletions app/lib/file_parse_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def self.run_parse_job(study_file, study, user, reparse: false, persist_on_fail:
anndata_file: study_file.gs_url, extract: %w[cluster], obsm_keys: [obsm_key],
file_size: study_file.upload_file_size
)
elsif study_file.needs_raw_counts_extraction?
params_object = AnnDataIngestParameters.new(
anndata_file: study_file.gs_url, extract: %w[raw_counts], obsm_keys: nil,
file_size: study_file.upload_file_size
)
else
params_object = AnnDataIngestParameters.new(
anndata_file: study_file.gs_url, obsm_keys: study_file.ann_data_file_info.obsm_key_names,
Expand Down
12 changes: 9 additions & 3 deletions app/models/ingest_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,14 @@ def launch_anndata_subparse_jobs
job.delay.push_remote_and_launch_ingest
end

# unset anndata_summary flag to allow reporting summary later
study_file.unset_anndata_summary!
# if this was only a raw counts extraction, update parse status
if params_object.extract == %w[raw_counts]
study_file.update(parse_status: 'parsed')
launch_differential_expression_jobs
else
# unset anndata_summary flag to allow reporting summary later unless this is only a raw counts extraction
study_file.unset_anndata_summary!
end
end
end

Expand Down Expand Up @@ -1017,7 +1023,7 @@ def log_to_mixpanel
mixpanel_log_props = get_job_analytics
# log job properties to Mixpanel
MetricsService.log(mixpanel_event_name, mixpanel_log_props, user)
report_anndata_summary if study_file.is_viz_anndata? && action != :ingest_anndata
report_anndata_summary if study_file.is_viz_anndata? && !%i[ingest_anndata differential_expression].include?(action)
end

# set a mixpanel event name based on action
Expand Down
5 changes: 5 additions & 0 deletions app/models/study_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,11 @@ def is_raw_counts_file?
!!expression_file_info&.is_raw_counts || !!ann_data_file_info&.has_raw_counts
end

# check for when a user comes back to indicate an AnnData file has raw counts data
def needs_raw_counts_extraction?
is_viz_anndata? && is_raw_counts_file? && parsed? && study.expression_matrix_cells(self, matrix_type: 'raw').empty?
end

def is_anndata?
file_type == 'AnnData'
end
Expand Down
2 changes: 1 addition & 1 deletion config/application.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class Application < Rails::Application
config.middleware.use Rack::Brotli

# Docker image for file parsing via scp-ingest-pipeline
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.37.0'
config.ingest_docker_image = 'gcr.io/broad-singlecellportal-staging/scp-ingest-pipeline:1.37.1'

# Docker image for image pipeline jobs
config.image_pipeline_docker_image = 'gcr.io/broad-singlecellportal-staging/image-pipeline:0.1.0_c2b090043'
Expand Down
6 changes: 3 additions & 3 deletions image-pipeline/yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -970,9 +970,9 @@ [email protected]:
node-fetch "2.6.7"

cross-spawn@^6.0.5:
version "6.0.5"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.5.tgz#4a5ec7c64dfae22c3a14124dbacdee846d80cbc4"
integrity sha512-eTVLrBSt7fjbDygz805pMnstIs2VTBNkRm0qxZd+M7A5XDdxVRWO5MxGBXZhjY4cqLYLdtrGqRf8mBPmzwSpWQ==
version "6.0.6"
resolved "https://registry.yarnpkg.com/cross-spawn/-/cross-spawn-6.0.6.tgz#30d0efa0712ddb7eb5a76e1e8721bffafa6b5d57"
integrity sha512-VqCUuhcd1iB+dsv8gxPttb5iZh/D0iubSP21g36KXdEuf6I5JiioesUVjpCdHV9MZRUfVFlvwtIUyPfxo5trtw==
dependencies:
nice-try "^1.0.4"
path-key "^2.0.1"
Expand Down
61 changes: 61 additions & 0 deletions test/api/study_files_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,65 @@ class StudyFilesControllerTest < ActionDispatch::IntegrationTest
assert_equal description, study_file.description
assert study_file.parsed?
end

test 'should extract only raw counts from existing AnnData uploads' do
study = FactoryBot.create(:detached_study,
name_prefix: 'AnnData Raw Counts Study',
public: true,
user: @user,
test_array: @@studies_to_clean)
ann_data_file = FactoryBot.create(:ann_data_file,
name: 'matrix.h5ad',
study:,
upload_file_size: 1.megabyte,
cell_input: %w[A B C D],
has_raw_counts: false,
reference_file: false,
annotation_input: [
{ name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
],
coordinate_input: [
{ umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
],
expression_input: {
'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
})

assert_not ann_data_file.is_raw_counts_file?
assert_not ann_data_file.needs_raw_counts_extraction?

# test for normal uploads
study_file_attributes = {
study_file: {
expression_file_info_attributes: {
is_raw_counts: true,
units: 'raw counts'
}
}
}
FileParseService.stub :run_parse_job, true do
execute_http_request(:patch, api_v1_study_study_file_path(study_id: study.id, id: ann_data_file.id),
request_payload: study_file_attributes)
assert_response :success
ann_data_file.reload
assert ann_data_file.is_raw_counts_file?
assert ann_data_file.needs_raw_counts_extraction?
end

# test for bucket path uploads, resetting flags first
ann_data_file.expression_file_info.update(is_raw_counts: false)
ann_data_file.ann_data_file_info.update(has_raw_counts: false)
ann_data_file.update(remote_location: 'matrix.h5ad')
assert_not ann_data_file.is_raw_counts_file?
assert_not ann_data_file.needs_raw_counts_extraction?

FileParseService.stub :run_parse_job, true do
execute_http_request(:patch, api_v1_study_study_file_path(study_id: study.id, id: ann_data_file.id),
request_payload: study_file_attributes)
assert_response :success
ann_data_file.reload
assert ann_data_file.is_raw_counts_file?
assert ann_data_file.needs_raw_counts_extraction?
end
end
end
16 changes: 11 additions & 5 deletions test/factories/study_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@
file_type { 'AnnData' }
parse_status { 'parsed' }
transient do
reference_file { true }
# cell_input is an array of all cell names
# e.g. ['cellA', 'cellB', 'cellC']
cell_input { [] }
Expand All @@ -173,7 +172,8 @@
# phex: [['cellA', 0.6],['cellB', 6.1], ['cellC', 4.5]]
# }
expression_input { {} }
has_raw_counts { true }
has_raw_counts { expression_input.any? && cell_input.any? }
reference_file { cell_input.empty? && coordinate_input.empty? && annotation_input.empty? && expression_input.empty? }
end
after(:create) do |file, evaluator|
file.build_ann_data_file_info
Expand All @@ -194,10 +194,16 @@
end
if evaluator.expression_input.any?
file.build_expression_file_info(library_preparation_protocol: "10x 5' v3")
file.expression_file_info.save
file.ann_data_file_info.has_expression = true
file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts
%w[raw processed].each do |matrix_type|
if evaluator.has_raw_counts
file.expression_file_info.is_raw_counts = evaluator.has_raw_counts
file.expression_file_info.units = 'raw counts'
file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts
end
file.expression_file_info.save
matrix_types = %w[processed]
matrix_types << 'raw' if evaluator.has_raw_counts
matrix_types.each do |matrix_type|
FactoryBot.create(:data_array,
array_type: 'cells',
array_index: 0,
Expand Down
80 changes: 40 additions & 40 deletions test/integration/external/fire_cloud_client_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -335,46 +335,46 @@ def test_create_configuration_template
##

# main groups test - CRUD group & members
def test_create_and_manage_user_groups
skip if @smoke_test
# set group name
group_name = "test-group-#{@random_test_seed}"
puts 'creating group...'
group = @fire_cloud_client.create_user_group(group_name)
assert group.present?, 'Did not create user group'

puts 'adding user to group...'
user_role = FireCloudClient::USER_GROUP_ROLES.sample
user_added = @fire_cloud_client.add_user_to_group(group_name, user_role, @test_email)
assert user_added, 'Did not add user to group'

puts 'getting user groups...'
groups = @fire_cloud_client.get_user_groups
assert groups.any?, 'Did not find any user groups'

puts 'getting user group...'
group = @fire_cloud_client.get_user_group(group_name)
assert group.present?, "Did not retrieve user group: #{group_name}"
email_key = user_role == 'admin' ? 'adminsEmails' : 'membersEmails'
assert group[email_key].include?(@test_email), "Test group did not have #{@test_email} as member of #{email_key}: #{group[email_key]}"

puts 'delete user from group...'
delete_user = @fire_cloud_client.delete_user_from_group(group_name, user_role, @test_email)
assert delete_user, 'Did not delete user from group'

puts 'confirming user delete...'
updated_group = @fire_cloud_client.get_user_group(group_name)
assert !updated_group[email_key].include?(@test_email), "Test group did still has #{@test_email} as member of #{email_key}: #{updated_group[email_key]}"

puts 'deleting user group...'
delete_group = @fire_cloud_client.delete_user_group(group_name)
assert delete_group, 'Did not delete user group'

puts 'confirming user group delete...'
updated_groups = @fire_cloud_client.get_user_groups
group_names = updated_groups.map {|g| g['groupName']}
assert !group_names.include?(group_name), "Test group '#{group_name}' was not deleted: #{group_names.join(', ')}"
end
# def test_create_and_manage_user_groups
# skip if @smoke_test
# # set group name
# group_name = "test-group-#{@random_test_seed}"
# puts 'creating group...'
# group = @fire_cloud_client.create_user_group(group_name)
# assert group.present?, 'Did not create user group'
#
# puts 'adding user to group...'
# user_role = FireCloudClient::USER_GROUP_ROLES.sample
# user_added = @fire_cloud_client.add_user_to_group(group_name, user_role, @test_email)
# assert user_added, 'Did not add user to group'
#
# puts 'getting user groups...'
# groups = @fire_cloud_client.get_user_groups
# assert groups.any?, 'Did not find any user groups'
#
# puts 'getting user group...'
# group = @fire_cloud_client.get_user_group(group_name)
# assert group.present?, "Did not retrieve user group: #{group_name}"
# email_key = user_role == 'admin' ? 'adminsEmails' : 'membersEmails'
# assert group[email_key].include?(@test_email), "Test group did not have #{@test_email} as member of #{email_key}: #{group[email_key]}"
#
# puts 'delete user from group...'
# delete_user = @fire_cloud_client.delete_user_from_group(group_name, user_role, @test_email)
# assert delete_user, 'Did not delete user from group'
#
# puts 'confirming user delete...'
# updated_group = @fire_cloud_client.get_user_group(group_name)
# assert !updated_group[email_key].include?(@test_email), "Test group did still has #{@test_email} as member of #{email_key}: #{updated_group[email_key]}"
#
# puts 'deleting user group...'
# delete_group = @fire_cloud_client.delete_user_group(group_name)
# assert delete_group, 'Did not delete user group'
#
# puts 'confirming user group delete...'
# updated_groups = @fire_cloud_client.get_user_groups
# group_names = updated_groups.map {|g| g['groupName']}
# assert !group_names.include?(group_name), "Test group '#{group_name}' was not deleted: #{group_names.join(', ')}"
# end

##
#
Expand Down
1 change: 1 addition & 0 deletions test/models/ann_data_file_info_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def generate_id
ann_data_file = FactoryBot.create(:ann_data_file,
name: 'test.h5ad',
study:,
has_raw_counts: false,
cell_input: %w[bar bing],
expression_input: {
'foo' => [['bar', 0.3], ['bing', 1.0]]
Expand Down
16 changes: 15 additions & 1 deletion test/models/ingest_job_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,21 @@ class IngestJobTest < ActiveSupport::TestCase
assert_equal 1, study.cell_metadata.count
assert_equal 1, study.genes.count

job = IngestJob.new(pipeline_name: SecureRandom.uuid, study:, study_file:, user: @user, action: :ingest_subsample)
study_file.set_anndata_summary!
study_file.reload
safe_fragment = study_file.ann_data_file_info.fragments_by_type(:cluster).first.with_indifferent_access
cluster = study.cluster_groups.first
cluster_file = RequestUtils.data_fragment_url(
study_file, 'cluster', file_type_detail: safe_fragment[:obsm_key_name]
)
cell_metadata_file = RequestUtils.data_fragment_url(study_file, 'metadata')
params_object = AnnDataIngestParameters.new(
subsample: true, ingest_anndata: false, extract: nil, obsm_keys: nil, name: cluster.name,
cluster_file:, cell_metadata_file:
)
job = IngestJob.new(
pipeline_name: SecureRandom.uuid, study:, study_file:, user: @user, action: :ingest_subsample, params_object:
)
mock = Minitest::Mock.new
2.times do
mock.expect :error, nil
Expand Down
Loading

0 comments on commit 21c4c5e

Please sign in to comment.