From 19b66992284a13b12d3c5bc8371ca910937f6b63 Mon Sep 17 00:00:00 2001 From: Vladimir Kochnev Date: Wed, 3 Aug 2016 12:09:19 +0300 Subject: [PATCH 1/2] Fix Relationship.instances cache. This PR aims to fix several issues with Relationship cache: 1) It's not threadsafe, so I propose to use a TLS variable for this. 2) Memory obtained by cache remains non-freed before the next run of `serialize`. I think it should be freed immediately. 3) Memory should be freed in `ensure` block to prevent memory bloating in case of exception. *There are only two hard things in Computer Science: cache invalidation and naming things.* --- lib/axlsx/package.rb | 8 ++++++-- lib/axlsx/rels/relationship.rb | 18 +++++++++++++----- test/rels/tc_relationship.rb | 8 ++++++++ test/tc_package.rb | 2 ++ 4 files changed, 29 insertions(+), 7 deletions(-) diff --git a/lib/axlsx/package.rb b/lib/axlsx/package.rb index da661b34..2e9e14fc 100644 --- a/lib/axlsx/package.rb +++ b/lib/axlsx/package.rb @@ -100,11 +100,13 @@ def workbook=(workbook) DataTypeValidator.validate :Package_workbook, Workbook, # File.open('example_streamed.xlsx', 'w') { |f| f.write(s.read) } def serialize(output, confirm_valid=false) return false unless !confirm_valid || self.validate.empty? - Relationship.clear_cached_instances + Relationship.initialize_cached_instances Zip::OutputStream.open(output) do |zip| write_parts(zip) end true + ensure + Relationship.clear_cached_instances end @@ -113,11 +115,13 @@ def serialize(output, confirm_valid=false) # @return [StringIO|Boolean] False if confirm_valid and validation errors exist. rewound string IO if not. def to_stream(confirm_valid=false) return false unless !confirm_valid || self.validate.empty? - Relationship.clear_cached_instances + Relationship.initialize_cached_instances zip = write_parts(Zip::OutputStream.new(StringIO.new, true)) stream = zip.close_buffer stream.rewind stream + ensure + Relationship.clear_cached_instances end # Encrypt the package into a CFB using the password provided diff --git a/lib/axlsx/rels/relationship.rb b/lib/axlsx/rels/relationship.rb index 99dad367..66f53b2a 100644 --- a/lib/axlsx/rels/relationship.rb +++ b/lib/axlsx/rels/relationship.rb @@ -8,20 +8,28 @@ class << self # Keeps track of all instances of this class. # @return [Array] def instances - @instances ||= [] + Thread.current[:axlsx_relationship_cached_instances] ||= [] end - - # Clear cached instances. + + # Initialize cached instances. # # This should be called before serializing a package (see {Package#serialize} and # {Package#to_stream}) to make sure that serialization is idempotent (i.e. # Relationship instances are generated with the same IDs everytime the package # is serialized). + def initialize_cached_instances + Thread.current[:axlsx_relationship_cached_instances] = [] + end + + # Clear cached instances. + # + # This should be called after serializing a package (see {Package#serialize} and + # {Package#to_stream}) to free the memory allocated for cache. # # Also, calling this avoids memory leaks (cached instances lingering around # forever). def clear_cached_instances - @instances = [] + Thread.current[:axlsx_relationship_cached_instances] = nil end # Generate and return a unique id (eg. `rId123`) Used for setting {#Id}. @@ -30,7 +38,7 @@ def clear_cached_instances # {clear_cached_instances} will automatically reset the generated ids, too. # @return [String] def next_free_id - "rId#{@instances.size + 1}" + "rId#{instances.size + 1}" end end diff --git a/test/rels/tc_relationship.rb b/test/rels/tc_relationship.rb index add1654f..845f789e 100644 --- a/test/rels/tc_relationship.rb +++ b/test/rels/tc_relationship.rb @@ -13,6 +13,14 @@ def test_instances_with_same_attributes_share_id instance = Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target') assert_equal instance.Id, Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target').Id end + + def test_instances_cache_is_thread_safe + cache1, cache2 = nil + t1 = Thread.new { cache1 = Axlsx::Relationship.instances } + t2 = Thread.new { cache2 = Axlsx::Relationship.instances } + [t1, t2].each(&:join) + assert_not_same(cache1, cache2) + end def test_target_is_only_considered_for_same_attributes_check_if_target_mode_is_external source_obj = Object.new diff --git a/test/tc_package.rb b/test/tc_package.rb index d89f35b0..4bd0d429 100644 --- a/test/tc_package.rb +++ b/test/tc_package.rb @@ -236,6 +236,8 @@ def test_to_stream # this is just a roundabout guess for a package as it is build now # in testing. assert(stream.size > 80000) + # Cached instances should be cleared + assert(Axlsx::Relationship.instances.empty?) end def test_encrypt From c6dc227f37a2ccdc7314521348e877ef6973d047 Mon Sep 17 00:00:00 2001 From: Vladimir Kochnev Date: Sat, 9 Mar 2019 19:49:27 +0300 Subject: [PATCH 2/2] Cache relationship in Hash rather than in Array. Also cacle only ids, not entire instances. --- lib/axlsx/package.rb | 8 +++--- lib/axlsx/rels/relationship.rb | 47 +++++++++++++++------------------- test/rels/tc_relationship.rb | 6 ++--- test/tc_package.rb | 4 +-- 4 files changed, 29 insertions(+), 36 deletions(-) diff --git a/lib/axlsx/package.rb b/lib/axlsx/package.rb index 2e9e14fc..56410339 100644 --- a/lib/axlsx/package.rb +++ b/lib/axlsx/package.rb @@ -100,13 +100,13 @@ def workbook=(workbook) DataTypeValidator.validate :Package_workbook, Workbook, # File.open('example_streamed.xlsx', 'w') { |f| f.write(s.read) } def serialize(output, confirm_valid=false) return false unless !confirm_valid || self.validate.empty? - Relationship.initialize_cached_instances + Relationship.initialize_ids_cache Zip::OutputStream.open(output) do |zip| write_parts(zip) end true ensure - Relationship.clear_cached_instances + Relationship.clear_ids_cache end @@ -115,13 +115,13 @@ def serialize(output, confirm_valid=false) # @return [StringIO|Boolean] False if confirm_valid and validation errors exist. rewound string IO if not. def to_stream(confirm_valid=false) return false unless !confirm_valid || self.validate.empty? - Relationship.initialize_cached_instances + Relationship.initialize_ids_cache zip = write_parts(Zip::OutputStream.new(StringIO.new, true)) stream = zip.close_buffer stream.rewind stream ensure - Relationship.clear_cached_instances + Relationship.clear_ids_cache end # Encrypt the package into a CFB using the password provided diff --git a/lib/axlsx/rels/relationship.rb b/lib/axlsx/rels/relationship.rb index 66f53b2a..ad47c9a3 100644 --- a/lib/axlsx/rels/relationship.rb +++ b/lib/axlsx/rels/relationship.rb @@ -5,40 +5,40 @@ module Axlsx class Relationship class << self - # Keeps track of all instances of this class. + # Keeps track of relationship ids in use. # @return [Array] - def instances - Thread.current[:axlsx_relationship_cached_instances] ||= [] + def ids_cache + Thread.current[:axlsx_relationship_ids_cache] ||= {} end - # Initialize cached instances. + # Initialize cached ids. # # This should be called before serializing a package (see {Package#serialize} and # {Package#to_stream}) to make sure that serialization is idempotent (i.e. # Relationship instances are generated with the same IDs everytime the package # is serialized). - def initialize_cached_instances - Thread.current[:axlsx_relationship_cached_instances] = [] + def initialize_ids_cache + Thread.current[:axlsx_relationship_ids_cache] = {} end - # Clear cached instances. + # Clear cached ids. # # This should be called after serializing a package (see {Package#serialize} and # {Package#to_stream}) to free the memory allocated for cache. # - # Also, calling this avoids memory leaks (cached instances lingering around + # Also, calling this avoids memory leaks (cached ids lingering around # forever). - def clear_cached_instances - Thread.current[:axlsx_relationship_cached_instances] = nil + def clear_ids_cache + Thread.current[:axlsx_relationship_ids_cache] = nil end # Generate and return a unique id (eg. `rId123`) Used for setting {#Id}. # - # The generated id depends on the number of cached instances, so using - # {clear_cached_instances} will automatically reset the generated ids, too. + # The generated id depends on the number of previously cached ids, so using + # {clear_ids_cache} will automatically reset the generated ids, too. # @return [String] def next_free_id - "rId#{instances.size + 1}" + "rId#{ids_cache.size + 1}" end end @@ -88,12 +88,7 @@ def initialize(source_obj, type, target, options={}) self.Target=target self.Type=type self.TargetMode = options[:target_mode] if options[:target_mode] - @Id = if (existing = self.class.instances.find{ |i| should_use_same_id_as?(i) }) - existing.Id - else - self.class.next_free_id - end - self.class.instances << self + @Id = (self.class.ids_cache[ids_cache_key] ||= self.class.next_free_id) end # @see Target @@ -114,7 +109,7 @@ def to_xml_string(str = '') str << '/>' end - # Whether this relationship should use the same id as `other`. + # A key that determines whether this relationship should use already generated id. # # Instances designating the same relationship need to use the same id. We can not simply # compare the {#Target} attribute, though: `foo/bar.xml`, `../foo/bar.xml`, @@ -124,13 +119,11 @@ def to_xml_string(str = '') # then {#Target} will be an absolute URL and thus can safely be compared). # # @todo Implement comparison of {#Target} based on normalized path names. - # @param other [Relationship] - def should_use_same_id_as?(other) - result = self.source_obj == other.source_obj && self.Type == other.Type && self.TargetMode == other.TargetMode - if self.TargetMode == :External - result &&= self.Target == other.Target - end - result + # @return [Array] + def ids_cache_key + key = [source_obj, self.Type, self.TargetMode] + key << self.Target if self.TargetMode == :External + key end end diff --git a/test/rels/tc_relationship.rb b/test/rels/tc_relationship.rb index 845f789e..f47b6a4d 100644 --- a/test/rels/tc_relationship.rb +++ b/test/rels/tc_relationship.rb @@ -14,10 +14,10 @@ def test_instances_with_same_attributes_share_id assert_equal instance.Id, Axlsx::Relationship.new(source_obj, Axlsx::WORKSHEET_R, 'target').Id end - def test_instances_cache_is_thread_safe + def test_ids_cache_is_thread_safe cache1, cache2 = nil - t1 = Thread.new { cache1 = Axlsx::Relationship.instances } - t2 = Thread.new { cache2 = Axlsx::Relationship.instances } + t1 = Thread.new { cache1 = Axlsx::Relationship.ids_cache } + t2 = Thread.new { cache2 = Axlsx::Relationship.ids_cache } [t1, t2].each(&:join) assert_not_same(cache1, cache2) end diff --git a/test/tc_package.rb b/test/tc_package.rb index 4bd0d429..1e5ce322 100644 --- a/test/tc_package.rb +++ b/test/tc_package.rb @@ -236,8 +236,8 @@ def test_to_stream # this is just a roundabout guess for a package as it is build now # in testing. assert(stream.size > 80000) - # Cached instances should be cleared - assert(Axlsx::Relationship.instances.empty?) + # Cached ids should be cleared + assert(Axlsx::Relationship.ids_cache.empty?) end def test_encrypt