fix(encoding): Use raw_html and text
This commit is contained in:
parent
36ea0179b3
commit
3430a28588
83
lib/push.rb
83
lib/push.rb
@ -42,6 +42,10 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
true
|
true
|
||||||
end
|
end
|
||||||
|
|
||||||
|
def excluded_file?(file)
|
||||||
|
@config['algolia']['excluded_files'].include?(file.name)
|
||||||
|
end
|
||||||
|
|
||||||
def check_credentials(api_key, application_id, index_name)
|
def check_credentials(api_key, application_id, index_name)
|
||||||
unless api_key
|
unless api_key
|
||||||
Jekyll.logger.error 'Algolia Error: No API key defined'
|
Jekyll.logger.error 'Algolia Error: No API key defined'
|
||||||
@ -87,8 +91,19 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
default_settings = {
|
default_settings = {
|
||||||
attributeForDistinct: 'parent_id',
|
attributeForDistinct: 'parent_id',
|
||||||
attributesForFaceting: %w(tags type),
|
attributesForFaceting: %w(tags type),
|
||||||
attributesToIndex: %w(title h1 h2 h3 h4 h5 h6 content tags),
|
attributesToIndex: %w(
|
||||||
attributesToRetrieve: %w(title posted_at content url css_selector),
|
title h1 h2 h3 h4 h5 h6
|
||||||
|
unordered(text)
|
||||||
|
unordered(tags)
|
||||||
|
),
|
||||||
|
attributesToRetrieve: %w(
|
||||||
|
title h1 h2 h3 h4 h5 h6
|
||||||
|
posted_at
|
||||||
|
content
|
||||||
|
text
|
||||||
|
url
|
||||||
|
css_selector
|
||||||
|
),
|
||||||
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
customRanking: ['desc(posted_at)', 'desc(title_weight)'],
|
||||||
distinct: true,
|
distinct: true,
|
||||||
highlightPreTag: '<span class="algolia__result-highlight">',
|
highlightPreTag: '<span class="algolia__result-highlight">',
|
||||||
@ -103,32 +118,6 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
index.set_settings(settings)
|
index.set_settings(settings)
|
||||||
end
|
end
|
||||||
|
|
||||||
def push(items)
|
|
||||||
api_key = AlgoliaSearchJekyll.api_key
|
|
||||||
application_id = @config['algolia']['application_id']
|
|
||||||
index_name = @config['algolia']['index_name']
|
|
||||||
check_credentials(api_key, application_id, index_name)
|
|
||||||
|
|
||||||
Algolia.init(application_id: application_id, api_key: api_key)
|
|
||||||
index = Algolia::Index.new(index_name)
|
|
||||||
configure_index(index)
|
|
||||||
index.clear_index
|
|
||||||
|
|
||||||
items.each_slice(1000) do |batch|
|
|
||||||
Jekyll.logger.info "Indexing #{batch.size} items"
|
|
||||||
begin
|
|
||||||
index.add_objects(batch)
|
|
||||||
rescue StandardError => error
|
|
||||||
Jekyll.logger.error 'Algolia Error: HTTP Error'
|
|
||||||
Jekyll.logger.warn error.message
|
|
||||||
exit 1
|
|
||||||
end
|
|
||||||
end
|
|
||||||
|
|
||||||
Jekyll.logger.info "Indexing of #{items.size} items " \
|
|
||||||
"in #{index_name} done."
|
|
||||||
end
|
|
||||||
|
|
||||||
def get_items_from_file(file)
|
def get_items_from_file(file)
|
||||||
is_page = file.is_a?(Jekyll::Page)
|
is_page = file.is_a?(Jekyll::Page)
|
||||||
is_post = file.is_a?(Jekyll::Post)
|
is_post = file.is_a?(Jekyll::Post)
|
||||||
@ -136,6 +125,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
# We only index posts, and markdown pages
|
# We only index posts, and markdown pages
|
||||||
return nil unless is_page || is_post
|
return nil unless is_page || is_post
|
||||||
return nil if is_page && !parseable?(file)
|
return nil if is_page && !parseable?(file)
|
||||||
|
return nil if excluded_file?(file)
|
||||||
|
|
||||||
html = file.content.gsub("\n", ' ')
|
html = file.content.gsub("\n", ' ')
|
||||||
|
|
||||||
@ -171,6 +161,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
tags.map! { |tag| tag.to_s.gsub(',', '') }
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Get the list of headings (h1, h2, etc) above the specified node
|
||||||
def get_previous_hx(node, memo = { level: 7 })
|
def get_previous_hx(node, memo = { level: 7 })
|
||||||
previous = node.previous_sibling
|
previous = node.previous_sibling
|
||||||
# Stop if no previous element
|
# Stop if no previous element
|
||||||
@ -195,7 +186,7 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
memo[:level] = title_level
|
memo[:level] = title_level
|
||||||
|
|
||||||
# Add to the memo and continue
|
# Add to the memo and continue
|
||||||
memo[tag_name.to_sym] = previous.text
|
memo[tag_name.to_sym] = previous.content
|
||||||
get_previous_hx(previous, memo)
|
get_previous_hx(previous, memo)
|
||||||
end
|
end
|
||||||
|
|
||||||
@ -220,17 +211,47 @@ class AlgoliaSearchJekyllPush < Jekyll::Command
|
|||||||
node.css_path.gsub('html > body > ', '')
|
node.css_path.gsub('html > body > ', '')
|
||||||
end
|
end
|
||||||
|
|
||||||
|
# Get a list of items representing the different paragraphs
|
||||||
def get_paragraphs_from_html(html, base_data)
|
def get_paragraphs_from_html(html, base_data)
|
||||||
doc = Nokogiri::HTML(html)
|
doc = Nokogiri::HTML(html)
|
||||||
doc.css('p').map.with_index do |p, index|
|
paragraphs = doc.css('p').map.with_index do |p, index|
|
||||||
|
next unless p.text.size > 0
|
||||||
new_item = base_data.clone
|
new_item = base_data.clone
|
||||||
new_item.merge!(get_previous_hx(p))
|
new_item.merge!(get_previous_hx(p))
|
||||||
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
new_item[:objectID] = "#{new_item[:parent_id]}_#{index}"
|
||||||
new_item[:css_selector] = get_css_selector(p)
|
new_item[:css_selector] = get_css_selector(p)
|
||||||
new_item[:content] = p.to_s
|
new_item[:raw_html] = p.to_s
|
||||||
|
new_item[:text] = p.content
|
||||||
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
new_item[:title_weight] = get_title_weight(p.text, new_item)
|
||||||
new_item
|
new_item
|
||||||
end
|
end
|
||||||
|
paragraphs.compact
|
||||||
|
end
|
||||||
|
|
||||||
|
def push(items)
|
||||||
|
api_key = AlgoliaSearchJekyll.api_key
|
||||||
|
application_id = @config['algolia']['application_id']
|
||||||
|
index_name = @config['algolia']['index_name']
|
||||||
|
check_credentials(api_key, application_id, index_name)
|
||||||
|
|
||||||
|
Algolia.init(application_id: application_id, api_key: api_key)
|
||||||
|
index = Algolia::Index.new(index_name)
|
||||||
|
configure_index(index)
|
||||||
|
index.clear_index
|
||||||
|
|
||||||
|
items.each_slice(1000) do |batch|
|
||||||
|
Jekyll.logger.info "Indexing #{batch.size} items"
|
||||||
|
begin
|
||||||
|
index.add_objects(batch)
|
||||||
|
rescue StandardError => error
|
||||||
|
Jekyll.logger.error 'Algolia Error: HTTP Error'
|
||||||
|
Jekyll.logger.warn error.message
|
||||||
|
exit 1
|
||||||
|
end
|
||||||
|
end
|
||||||
|
|
||||||
|
Jekyll.logger.info "Indexing of #{items.size} items " \
|
||||||
|
"in #{index_name} done."
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user