diff --git a/lib/oga.rb b/lib/oga.rb index 239ac83..797fab7 100644 --- a/lib/oga.rb +++ b/lib/oga.rb @@ -5,6 +5,7 @@ require 'thread' require_relative 'oga/version' require_relative 'oga/oga' +require_relative 'oga/lru' # Load these first so that the native extensions don't have to define the # Oga::XML namespace. diff --git a/lib/oga/lru.rb b/lib/oga/lru.rb new file mode 100644 index 0000000..378e414 --- /dev/null +++ b/lib/oga/lru.rb @@ -0,0 +1,140 @@ +module Oga + ## + # Thread-safe LRU cache using a Hash as the underlying storage engine. + # Whenever the size of the cache exceeds the given limit the oldest keys are + # removed (base on insert order). + # + # This class uses its own list of keys (as returned by {LRU#keys}) instead of + # relying on `Hash#keys` as the latter allocates a new Array upon every call. + # + # This class doesn't use MonitorMixin due to the extra overhead it adds + # compared to using a Mutex directly. + # + # Example usage: + # + # cache = LRU.new(3) + # + # cache[:a] = 10 + # cache[:b] = 20 + # cache[:c] = 30 + # cache[:d] = 40 + # + # cache.keys # => [:b, :c, :d] + # + class LRU + ## + # @param [Fixnum] maximum + # + def initialize(maximum = 1024) + @maximum = maximum + @cache = {} + @keys = [] + @mutex = Mutex.new + @owner = Thread.current + end + + ## + # Returns the value of the key. + # + # @param [Mixed] key + # @return [Mixed] + # + def [](key) + return synchronize { @cache[key] } + end + + ## + # Sets the key and its value. Old keys are discarded if the LRU size exceeds + # the limit. + # + # @param [Mixed] key + # @param [Mixed] value + # + def []=(key, value) + synchronize do + @cache[key] = value + + @keys.delete(key) if @keys.include?(key) + + @keys << key + + resize + end + end + + ## + # Returns a key if it exists, otherwise yields the supplied block and uses + # its return value as the key value. + # + # @param [Mixed] key + # @return [Mixed] + # + def get_or_set(key) + return synchronize { self[key] ||= yield } + end + + ## + # @return [Array] + # + def keys + return synchronize { @keys } + end + + ## + # @param [Mixed] key + # @return [TrueClass|FalseClass] + # + def key?(key) + return synchronize { @cache.key?(key) } + end + + ## + # Removes all keys from the cache. + # + def clear + synchronize do + @keys.clear + @cache.clear + end + end + + ## + # @return [Fixnum] + # + def size + return synchronize { @cache.size } + end + + alias_method :length, :size + + private + + ## + # Yields the supplied block in a synchronized manner (if needed). This + # method is heavily based on `MonitorMixin#mon_enter`. + # + def synchronize + if @owner != Thread.current + @mutex.synchronize do + @owner = Thread.current + + yield + end + else + yield + end + end + + ## + # Removes old keys until the size of the hash no longer exceeds the maximum + # size. + # + def resize + return unless size > @maximum + + to_remove = @keys.shift(size - @maximum) + + to_remove.each { |key| @cache.delete(key) } + end + end # LRU +end # Oga diff --git a/spec/oga/lru_spec.rb b/spec/oga/lru_spec.rb new file mode 100644 index 0000000..bba3f9f --- /dev/null +++ b/spec/oga/lru_spec.rb @@ -0,0 +1,162 @@ +require 'spec_helper' + +describe Oga::LRU do + describe '#[]' do + it 'returns nil for a non existing key' do + described_class.new[:a].should be_nil + end + + it 'returns the value of an existing key' do + cache = described_class.new + + cache[:a] = 10 + + cache[:a].should == 10 + end + end + + describe '#[]=' do + it 'sets the value of a key' do + cache = described_class.new + + cache[:a] = 10 + + cache[:a].should == 10 + end + + it 'resizes the cache if the new amount of keys exceeds the limit' do + cache = described_class.new(1) + + cache[:a] = 10 + cache[:b] = 20 + + cache.keys.should == [:b] + end + + it 'adds duplicate keys at the end of the list of keys' do + cache = described_class.new + + cache[:a] = 10 + cache[:b] = 20 + cache[:a] = 30 + + cache.keys.should == [:b, :a] + end + + describe 'using multiple threads' do + it 'supports concurrent writes' do + cache = described_class.new + numbers = 1..10 + + each_in_parallel(numbers) do |number| + cache[number] = number + end + + numbers.each do |number| + cache[number].should == number + end + end + + it 'supports concurrent resizes' do + cache = described_class.new(5) + numbers = 1..10 + + each_in_parallel(numbers) do |number| + cache[number] = number + end + + cache.size.should == 5 + end + end + end + + describe '#get_or_set' do + it 'sets a non existing key' do + cache = described_class.new + + cache.get_or_set(:a) { 10 }.should == 10 + end + + it 'returns the value of an existing key' do + cache = described_class.new + + cache[:a] = 10 + + cache.get_or_set(:a) { 20 }.should == 10 + end + + describe 'using multiple threads' do + it 'only sets a key once' do + cache = described_class.new + + cache.should_receive(:[]=).once.and_call_original + + each_in_parallel([1, 1, 1]) do |number| + cache.get_or_set(number) { number } + end + end + end + end + + describe '#keys' do + it 'returns the keys of the cache' do + cache = described_class.new + + cache[:a] = 10 + cache[:b] = 20 + + cache.keys.should == [:a, :b] + end + + it 'returns the keys without any duplicates' do + cache = described_class.new + + cache[:a] = 10 + cache[:a] = 20 + + cache.keys.should == [:a] + end + end + + describe '#key?' do + it 'returns true for an existing key' do + cache = described_class.new + + cache[:a] = 10 + + cache.key?(:a).should == true + end + + it 'returns false for a non existing key' do + cache = described_class.new + + cache.key?(:a).should == false + end + end + + describe '#clear' do + it 'removes all keys from the cache' do + cache = described_class.new + + cache[:a] = 10 + + cache.clear + + cache.size.should == 0 + end + end + + describe '#size' do + it 'returns 0 for an empty cache' do + described_class.new.size.should == 0 + end + + it 'returns the number of keys for a non empty cache' do + cache = described_class.new + + cache[:a] = 10 + + cache.size.should == 1 + end + end +end diff --git a/spec/spec_helper.rb b/spec/spec_helper.rb index 0815113..0fb1667 100644 --- a/spec/spec_helper.rb +++ b/spec/spec_helper.rb @@ -8,12 +8,14 @@ end require_relative '../lib/oga' require_relative 'support/parsing_helpers' require_relative 'support/evaluation_helpers' +require_relative 'support/threading_helpers' RSpec.configure do |config| config.color = true config.include Oga::ParsingHelpers config.include Oga::EvaluationHelpers + config.include Oga::ThreadingHelpers config.expect_with :rspec do |c| c.syntax = [:should, :expect] diff --git a/spec/support/threading_helpers.rb b/spec/support/threading_helpers.rb new file mode 100644 index 0000000..afea5db --- /dev/null +++ b/spec/support/threading_helpers.rb @@ -0,0 +1,25 @@ +module Oga + module ThreadingHelpers + ## + # Iterates over the enumerable using a separate thread for every value. This + # method waits for all threads to complete before returning. + # + # @example + # each_in_parallel([10, 20]) do |value| + # puts value + # end + # + # @param [Enumerable] enumerable + # @yieldparam [Mixed] + # + def each_in_parallel(enumerable) + threads = [] + + enumerable.each do |value| + threads << Thread.new { yield value } + end + + threads.each(&:join) + end + end # ThreadingHelpers +end # Oga