# Copyright (c) 2007, Mason Browne # # All rights reserved. # # Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: # # * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. # * Neither the name of the nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. module VZV class AdjacencyNode attr_reader :x, :y, :distance def initialize(x, y, dist) @x, @y, @distance = x, y, dist end end class NodeCollection def initialize(nodes = []) @x_collection, @y_collection, @dist_collection = [], [], [] @nodes = [] @distances = [] nodes.each{|n| add_node(n)} end def <<(val) add_node(val) end def average_x (@x_collection.inject(0){|sum, n| sum + n}).to_f / @x_collection.length unless @x_collection.length == 0 end def average_y (@y_collection.inject(0){|sum, n| sum + n}).to_f / @y_collection.length unless @y_collection.length == 0 end def average_distance (@dist_collection.inject(0){|sum, n| sum + n}).to_f / @dist_collection.length unless @dist_collection.length == 0 end def frequency_to_average_distance frequency / average_distance unless average_distance.nil? end def frequency @nodes.length end def average average_x end private def add_node(val) record_node(val) @nodes << val end def record_node(val) @x_collection << val.x @y_collection << val.x @dist_collection << val.distance end end class AdjacencyFinder attr_reader :data_matrix, :word_map def initialize(text = "") @data_array = text.gsub(/[^A-Za-z0-9\s]/, "").split(" ").map{|m|m.downcase} @data_matrix = build_matrix(@data_array) @word_map = build_word_map(@data_array, @data_matrix) @words_checked = [] end def average_positioning(word) nodeCol = @word_map[word] return nil if nodeCol.nil? or nodeCol.average_x.nil? nodeCol.average_x / @data_array.length end def duplicate_words ret = {} @word_map.each do |k,n| ret[k] = n unless n.frequency == 0 end ret end private def build_matrix(data_array) data_matrix = [] max = data_array.length i = 0 while i < max j = 0 data_matrix[i] = [] while j < max data_matrix[i][j] = (i - j).abs j += 1 end i += 1 end data_matrix end def build_word_map(data_array, data_matrix) # really only have to check the lower half of the array max = data_array.length word_map = {} i = 0 while i < max word_map[data_array[i]] = NodeCollection.new if word_map[data_array[i]].nil? j = i + 1 while j < max if data_array[i] == data_array[j] word_map[data_array[i]] << AdjacencyNode.new(i, j, data_matrix[i][j]) end j += 1 end i += 1 end word_map end end end