2018-02-12 13:53:34 +01:00

64 lines
1.8 KiB
Ruby

# frozen_string_literal: true
require 'algolia_html_extractor'
module Jekyll
module Algolia
# Module to extract records from Jekyll files
module Extractor
include Jekyll::Algolia
# Public: Extract records from the file
#
# file - The Jekyll file to process
def self.run(file)
# Getting all hierarchical nodes from the HTML input
raw_records = extract_raw_records(file.content)
# Getting file metadata
shared_metadata = FileBrowser.metadata(file)
# Building the list of records
records = []
raw_records.map do |record|
# We do not need to pass the HTML node element to the final record
node = record[:node]
record.delete(:node)
# Merging each record info with file info
record = Utils.compact_empty(record.merge(shared_metadata))
# Apply custom user-defined hooks
# Users can return `nil` from the hook to signal we should not index
# such a record
record = Hooks.apply_each(record, node)
next if record.nil?
records << record
end
records
end
# Public: Adds a unique :objectID field to the hash, representing the
# current content of the record
def self.add_unique_object_id(record)
record[:objectID] = AlgoliaHTMLExtractor.uuid(record)
record
end
# Public: Extract raw records from the file, including content for each
# node to index and hierarchy
#
# content - The HTML content to parse
def self.extract_raw_records(content)
AlgoliaHTMLExtractor.run(
content,
options: {
css_selector: Configurator.algolia('nodes_to_index')
}
)
end
end
end
end