Skip to content

Commit

Permalink
Merge pull request #2175 from broadinstitute/jb-anndata-raw-counts-fr…
Browse files Browse the repository at this point in the history
…ontend

Allow updates in AnnData expression form, extract raw counts if specified (SCP-5871)
  • Loading branch information
bistline authored Dec 3, 2024
2 parents 288aacc + 43d77c0 commit 3234c37
Show file tree
Hide file tree
Showing 11 changed files with 195 additions and 54 deletions.
6 changes: 4 additions & 2 deletions app/controllers/api/v1/study_files_controller.rb
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,9 @@ def perform_update!(study_file)
end
end

if safe_file_params[:upload].present? && !is_chunked || safe_file_params[:remote_location].present?
if safe_file_params[:upload].present? && !is_chunked ||
safe_file_params[:remote_location].present? ||
study_file.needs_raw_counts_extraction?
complete_upload_process(study_file, parse_on_upload)
end
end
Expand Down Expand Up @@ -445,7 +447,7 @@ def chunk
def complete_upload_process(study_file, parse_on_upload)
# set status to uploaded on full create, unless file is parsed and this is just an update
study_file.update!(status: 'uploaded', parse_status: 'unparsed') unless study_file.parsed?
if parse_on_upload
if parse_on_upload || study_file.needs_raw_counts_extraction?
if study_file.parseable?
persist_on_fail = study_file.remote_location.present?
FileParseService.run_parse_job(@study_file, @study, current_api_user, persist_on_fail:)
Expand Down
6 changes: 3 additions & 3 deletions app/javascript/components/upload/ExpressionFileForm.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ export default function ExpressionFileForm({
const speciesOptions = fileMenuOptions.species.map(spec => ({ label: spec.common_name, value: spec.id }))
const selectedSpecies = speciesOptions.find(opt => opt.value === file.taxon_id)
const isMtxFile = file.file_type === 'MM Coordinate Matrix'
const rawCountsInfo = file.expression_file_info.is_raw_counts
const isRawCountsFile = rawCountsInfo === 'true' || rawCountsInfo
const rawCountsInfo = file.expression_file_info.is_raw_counts
const isRawCountsFile = rawCountsInfo === 'true' || rawCountsInfo === true
const [showRawCountsUnits, setShowRawCountsUnits] = useState(isRawCountsFile)

const allowedFileExts = isMtxFile ? FileTypeExtensions.mtx : FileTypeExtensions.plainText
let requiredFields = showRawCountsUnits ? RAW_COUNTS_REQUIRED_FIELDS : REQUIRED_FIELDS
const rawCountsRequired = featureFlagState && featureFlagState.raw_counts_required_frontend
const rawCountsRequired = featureFlagState && featureFlagState.raw_counts_required_frontend && !isAnnDataExperience
if (rawCountsRequired && !isRawCountsFile ) {
requiredFields = requiredFields.concat(PROCESSED_ASSOCIATION_FIELD)
}
Expand Down
5 changes: 5 additions & 0 deletions app/lib/file_parse_service.rb
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,11 @@ def self.run_parse_job(study_file, study, user, reparse: false, persist_on_fail:
anndata_file: study_file.gs_url, extract: %w[cluster], obsm_keys: [obsm_key],
file_size: study_file.upload_file_size
)
elsif study_file.needs_raw_counts_extraction?
params_object = AnnDataIngestParameters.new(
anndata_file: study_file.gs_url, extract: %w[raw_counts], obsm_keys: nil,
file_size: study_file.upload_file_size
)
else
params_object = AnnDataIngestParameters.new(
anndata_file: study_file.gs_url, obsm_keys: study_file.ann_data_file_info.obsm_key_names,
Expand Down
12 changes: 9 additions & 3 deletions app/models/ingest_job.rb
Original file line number Diff line number Diff line change
Expand Up @@ -838,8 +838,14 @@ def launch_anndata_subparse_jobs
job.delay.push_remote_and_launch_ingest
end

# unset anndata_summary flag to allow reporting summary later
study_file.unset_anndata_summary!
# if this was only a raw counts extraction, update parse status
if params_object.extract == %w[raw_counts]
study_file.update(parse_status: 'parsed')
launch_differential_expression_jobs
else
# unset anndata_summary flag to allow reporting summary later unless this is only a raw counts extraction
study_file.unset_anndata_summary!
end
end
end

Expand Down Expand Up @@ -1017,7 +1023,7 @@ def log_to_mixpanel
mixpanel_log_props = get_job_analytics
# log job properties to Mixpanel
MetricsService.log(mixpanel_event_name, mixpanel_log_props, user)
report_anndata_summary if study_file.is_viz_anndata? && action != :ingest_anndata
report_anndata_summary if study_file.is_viz_anndata? && !%i[ingest_anndata differential_expression].include?(action)
end

# set a mixpanel event name based on action
Expand Down
5 changes: 5 additions & 0 deletions app/models/study_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -1047,6 +1047,11 @@ def is_raw_counts_file?
!!expression_file_info&.is_raw_counts || !!ann_data_file_info&.has_raw_counts
end

# check for when a user comes back to indicate an AnnData file has raw counts data
def needs_raw_counts_extraction?
is_viz_anndata? && is_raw_counts_file? && parsed? && study.expression_matrix_cells(self, matrix_type: 'raw').empty?
end

def is_anndata?
file_type == 'AnnData'
end
Expand Down
61 changes: 61 additions & 0 deletions test/api/study_files_controller_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -291,4 +291,65 @@ class StudyFilesControllerTest < ActionDispatch::IntegrationTest
assert_equal description, study_file.description
assert study_file.parsed?
end

test 'should extract only raw counts from existing AnnData uploads' do
study = FactoryBot.create(:detached_study,
name_prefix: 'AnnData Raw Counts Study',
public: true,
user: @user,
test_array: @@studies_to_clean)
ann_data_file = FactoryBot.create(:ann_data_file,
name: 'matrix.h5ad',
study:,
upload_file_size: 1.megabyte,
cell_input: %w[A B C D],
has_raw_counts: false,
reference_file: false,
annotation_input: [
{ name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
],
coordinate_input: [
{ umap: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
],
expression_input: {
'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
})

assert_not ann_data_file.is_raw_counts_file?
assert_not ann_data_file.needs_raw_counts_extraction?

# test for normal uploads
study_file_attributes = {
study_file: {
expression_file_info_attributes: {
is_raw_counts: true,
units: 'raw counts'
}
}
}
FileParseService.stub :run_parse_job, true do
execute_http_request(:patch, api_v1_study_study_file_path(study_id: study.id, id: ann_data_file.id),
request_payload: study_file_attributes)
assert_response :success
ann_data_file.reload
assert ann_data_file.is_raw_counts_file?
assert ann_data_file.needs_raw_counts_extraction?
end

# test for bucket path uploads, resetting flags first
ann_data_file.expression_file_info.update(is_raw_counts: false)
ann_data_file.ann_data_file_info.update(has_raw_counts: false)
ann_data_file.update(remote_location: 'matrix.h5ad')
assert_not ann_data_file.is_raw_counts_file?
assert_not ann_data_file.needs_raw_counts_extraction?

FileParseService.stub :run_parse_job, true do
execute_http_request(:patch, api_v1_study_study_file_path(study_id: study.id, id: ann_data_file.id),
request_payload: study_file_attributes)
assert_response :success
ann_data_file.reload
assert ann_data_file.is_raw_counts_file?
assert ann_data_file.needs_raw_counts_extraction?
end
end
end
16 changes: 11 additions & 5 deletions test/factories/study_file.rb
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,6 @@
file_type { 'AnnData' }
parse_status { 'parsed' }
transient do
reference_file { true }
# cell_input is an array of all cell names
# e.g. ['cellA', 'cellB', 'cellC']
cell_input { [] }
Expand All @@ -173,7 +172,8 @@
# phex: [['cellA', 0.6],['cellB', 6.1], ['cellC', 4.5]]
# }
expression_input { {} }
has_raw_counts { true }
has_raw_counts { expression_input.any? && cell_input.any? }
reference_file { cell_input.empty? && coordinate_input.empty? && annotation_input.empty? && expression_input.empty? }
end
after(:create) do |file, evaluator|
file.build_ann_data_file_info
Expand All @@ -194,10 +194,16 @@
end
if evaluator.expression_input.any?
file.build_expression_file_info(library_preparation_protocol: "10x 5' v3")
file.expression_file_info.save
file.ann_data_file_info.has_expression = true
file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts
%w[raw processed].each do |matrix_type|
if evaluator.has_raw_counts
file.expression_file_info.is_raw_counts = evaluator.has_raw_counts
file.expression_file_info.units = 'raw counts'
file.ann_data_file_info.has_raw_counts = evaluator.has_raw_counts
end
file.expression_file_info.save
matrix_types = %w[processed]
matrix_types << 'raw' if evaluator.has_raw_counts
matrix_types.each do |matrix_type|
FactoryBot.create(:data_array,
array_type: 'cells',
array_index: 0,
Expand Down
80 changes: 40 additions & 40 deletions test/integration/external/fire_cloud_client_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -335,46 +335,46 @@ def test_create_configuration_template
##

# main groups test - CRUD group & members
def test_create_and_manage_user_groups
skip if @smoke_test
# set group name
group_name = "test-group-#{@random_test_seed}"
puts 'creating group...'
group = @fire_cloud_client.create_user_group(group_name)
assert group.present?, 'Did not create user group'

puts 'adding user to group...'
user_role = FireCloudClient::USER_GROUP_ROLES.sample
user_added = @fire_cloud_client.add_user_to_group(group_name, user_role, @test_email)
assert user_added, 'Did not add user to group'

puts 'getting user groups...'
groups = @fire_cloud_client.get_user_groups
assert groups.any?, 'Did not find any user groups'

puts 'getting user group...'
group = @fire_cloud_client.get_user_group(group_name)
assert group.present?, "Did not retrieve user group: #{group_name}"
email_key = user_role == 'admin' ? 'adminsEmails' : 'membersEmails'
assert group[email_key].include?(@test_email), "Test group did not have #{@test_email} as member of #{email_key}: #{group[email_key]}"

puts 'delete user from group...'
delete_user = @fire_cloud_client.delete_user_from_group(group_name, user_role, @test_email)
assert delete_user, 'Did not delete user from group'

puts 'confirming user delete...'
updated_group = @fire_cloud_client.get_user_group(group_name)
assert !updated_group[email_key].include?(@test_email), "Test group did still has #{@test_email} as member of #{email_key}: #{updated_group[email_key]}"

puts 'deleting user group...'
delete_group = @fire_cloud_client.delete_user_group(group_name)
assert delete_group, 'Did not delete user group'

puts 'confirming user group delete...'
updated_groups = @fire_cloud_client.get_user_groups
group_names = updated_groups.map {|g| g['groupName']}
assert !group_names.include?(group_name), "Test group '#{group_name}' was not deleted: #{group_names.join(', ')}"
end
# def test_create_and_manage_user_groups
# skip if @smoke_test
# # set group name
# group_name = "test-group-#{@random_test_seed}"
# puts 'creating group...'
# group = @fire_cloud_client.create_user_group(group_name)
# assert group.present?, 'Did not create user group'
#
# puts 'adding user to group...'
# user_role = FireCloudClient::USER_GROUP_ROLES.sample
# user_added = @fire_cloud_client.add_user_to_group(group_name, user_role, @test_email)
# assert user_added, 'Did not add user to group'
#
# puts 'getting user groups...'
# groups = @fire_cloud_client.get_user_groups
# assert groups.any?, 'Did not find any user groups'
#
# puts 'getting user group...'
# group = @fire_cloud_client.get_user_group(group_name)
# assert group.present?, "Did not retrieve user group: #{group_name}"
# email_key = user_role == 'admin' ? 'adminsEmails' : 'membersEmails'
# assert group[email_key].include?(@test_email), "Test group did not have #{@test_email} as member of #{email_key}: #{group[email_key]}"
#
# puts 'delete user from group...'
# delete_user = @fire_cloud_client.delete_user_from_group(group_name, user_role, @test_email)
# assert delete_user, 'Did not delete user from group'
#
# puts 'confirming user delete...'
# updated_group = @fire_cloud_client.get_user_group(group_name)
# assert !updated_group[email_key].include?(@test_email), "Test group did still has #{@test_email} as member of #{email_key}: #{updated_group[email_key]}"
#
# puts 'deleting user group...'
# delete_group = @fire_cloud_client.delete_user_group(group_name)
# assert delete_group, 'Did not delete user group'
#
# puts 'confirming user group delete...'
# updated_groups = @fire_cloud_client.get_user_groups
# group_names = updated_groups.map {|g| g['groupName']}
# assert !group_names.include?(group_name), "Test group '#{group_name}' was not deleted: #{group_names.join(', ')}"
# end

##
#
Expand Down
1 change: 1 addition & 0 deletions test/models/ann_data_file_info_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def generate_id
ann_data_file = FactoryBot.create(:ann_data_file,
name: 'test.h5ad',
study:,
has_raw_counts: false,
cell_input: %w[bar bing],
expression_input: {
'foo' => [['bar', 0.3], ['bing', 1.0]]
Expand Down
16 changes: 15 additions & 1 deletion test/models/ingest_job_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -798,7 +798,21 @@ class IngestJobTest < ActiveSupport::TestCase
assert_equal 1, study.cell_metadata.count
assert_equal 1, study.genes.count

job = IngestJob.new(pipeline_name: SecureRandom.uuid, study:, study_file:, user: @user, action: :ingest_subsample)
study_file.set_anndata_summary!
study_file.reload
safe_fragment = study_file.ann_data_file_info.fragments_by_type(:cluster).first.with_indifferent_access
cluster = study.cluster_groups.first
cluster_file = RequestUtils.data_fragment_url(
study_file, 'cluster', file_type_detail: safe_fragment[:obsm_key_name]
)
cell_metadata_file = RequestUtils.data_fragment_url(study_file, 'metadata')
params_object = AnnDataIngestParameters.new(
subsample: true, ingest_anndata: false, extract: nil, obsm_keys: nil, name: cluster.name,
cluster_file:, cell_metadata_file:
)
job = IngestJob.new(
pipeline_name: SecureRandom.uuid, study:, study_file:, user: @user, action: :ingest_subsample, params_object:
)
mock = Minitest::Mock.new
2.times do
mock.expect :error, nil
Expand Down
41 changes: 41 additions & 0 deletions test/services/file_parse_service_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -205,4 +205,45 @@ class FileParseServiceTest < ActiveSupport::TestCase
end
end
end

test 'should extract raw counts from AnnData file after initial ingest' do
study = FactoryBot.create(:detached_study,
name_prefix: 'AnnData Raw Counts Test',
user: @user,
test_array: @@studies_to_clean)
study_file = FactoryBot.create(:ann_data_file,
name: 'data.h5ad',
reference_file: false,
study:,
cell_input: %w[A B C D],
annotation_input: [
{ name: 'disease', type: 'group', values: %w[cancer cancer normal normal] }
],
coordinate_input: [
{ tsne: { x: [1, 2, 3, 4], y: [5, 6, 7, 8] } }
],
expression_input: {
'phex' => [['A', 0.3], ['B', 1.0], ['C', 0.5], ['D', 0.1]]
})
assert study.expression_matrix_cells(study_file, matrix_type: 'raw').any?
# remove cells and re-run ingest to confirm job is launched
filename = "h5ad_frag.matrix.raw.mtx.gz"
query = {
name: "#{filename} Cells", cluster_name: filename, array_type: 'cells', linear_data_type: 'Study',
linear_data_id: study.id, study_file_id: study_file.id, cluster_group_id: nil, subsample_annotation: nil,
subsample_threshold: nil
}
DataArray.where(query).delete_all
study.reload
assert_empty study.expression_matrix_cells(study_file, matrix_type: 'raw')
job_mock = Minitest::Mock.new
job_mock.expect :push_remote_and_launch_ingest, nil
delay_mock = Minitest::Mock.new
delay_mock.expect :delay, job_mock
IngestJob.stub :new, delay_mock do
FileParseService.run_parse_job(study_file, study, study.user)
delay_mock.verify
job_mock.verify
end
end
end

0 comments on commit 3234c37

Please sign in to comment.