Adding tests for extractor run

This commit is contained in:
Pixelastic 2017-11-10 19:31:52 +01:00
parent 7ef8296e6f
commit a437f15a24
10 changed files with 208 additions and 510 deletions

View File

@ -25,7 +25,7 @@ module Jekyll
record.delete(:node)
# Merging each record info with file info
record = record.merge(shared_metadata).compact
record = Utils.compact_empty(record.merge(shared_metadata))
# Apply custom user-defined hooks
record = Jekyll::Algolia.hook_before_indexing_each(record, node)
@ -53,10 +53,6 @@ module Jekyll
}
).extract
end
def self.convert_to_json(raw_record)
raw_record
end
end
end
end

View File

@ -2,39 +2,168 @@
require 'spec_helper'
describe(Jekyll::Algolia::Extractor) do
let(:configurator) { Jekyll::Algolia::Configurator }
let(:filebrowser) { Jekyll::Algolia::FileBrowser }
let(:current) { Jekyll::Algolia::Extractor }
let(:site) { init_new_jekyll_site }
# Suppress Jekyll log about reading the config file
before do
allow(Jekyll.logger).to receive(:info)
end
describe '.extract_raw_records' do
# Given
let(:content) { 'some html markup' }
let(:html_extractor) { double('AlgoliaHTMLExtractor', extract: nil) }
let(:configurator) { Jekyll::Algolia::Configurator }
before do
allow(AlgoliaHTMLExtractor)
.to receive(:new)
.and_return(html_extractor)
end
before do
allow(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('foo')
context 'with mock data' do
# Given
let(:content) { 'some html markup' }
let(:html_extractor) { double('AlgoliaHTMLExtractor', extract: nil) }
before do
allow(AlgoliaHTMLExtractor)
.to receive(:new)
.and_return(html_extractor)
end
before do
allow(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('foo')
end
# When
before { current.extract_raw_records(content) }
# Then
it 'should create a new AlgoliaHTMLExtractor with the content passed' do
expect(AlgoliaHTMLExtractor)
.to have_received(:new)
.with(content, anything)
end
it 'should configure the extractor with the nodex_to_index config value' do
expect(AlgoliaHTMLExtractor)
.to have_received(:new)
.with(anything, options: { css_selector: 'foo' })
end
it { expect(html_extractor).to have_received(:extract) }
end
# When
before { current.extract_raw_records(content) }
context 'with real data' do
let(:site) { init_new_jekyll_site }
subject { current.extract_raw_records(content) }
# Then
it 'should create a new AlgoliaHTMLExtractor with the content passed' do
expect(AlgoliaHTMLExtractor)
.to have_received(:new)
.with(content, anything)
context 'with a page' do
let(:content) { site.__find_file('only-paragraphs.md').content }
it { expect(subject.length).to eq 6 }
end
context 'with a page with divs' do
let(:content) { site.__find_file('only-divs.md').content }
before do
allow(configurator)
.to receive(:algolia)
.with('nodes_to_index')
.and_return('div')
end
it { expect(subject.length).to eq 5 }
end
end
it 'should configure the extractor with the nodex_to_index config value' do
expect(AlgoliaHTMLExtractor)
.to have_received(:new)
.with(anything, options: { css_selector: 'foo' })
end
describe '.run' do
subject { current.run(file) }
context 'with mock data' do
let!(:file) { site.__find_file('html.html') }
before do
allow(Jekyll::Algolia)
.to receive(:hook_before_indexing_all)
.with(anything) { |input| input }
allow(Jekyll::Algolia)
.to receive(:hook_before_indexing_each)
.with(anything, anything) { |input| input }
allow(current)
.to receive(:extract_raw_records)
.and_return(raw_records)
allow(filebrowser)
.to receive(:metadata)
.and_return(metadata)
end
let(:raw_records) { [{}] }
let(:metadata) { {} }
describe 'should have one record per element extracted' do
let(:raw_records) { [{ foo: 'bar' }, { baz: 'foo' }] }
it { expect(subject.length).to eq 2 }
end
describe 'should all have the same common shared data' do
let(:raw_records) { [{ foo: 'bar' }, { baz: 'foo' }] }
let(:metadata) { { foo: 'bar' } }
it { expect(subject[0]).to include(foo: 'bar') }
it { expect(subject[1]).to include(foo: 'bar') }
end
describe 'should not contain a :node key' do
let(:raw_records) { [{ foo: 'bar', node: 'nope' }] }
it { expect(subject[0]).to_not include(:node) }
end
describe 'should not contain any nil or empty array keys' do
let(:raw_records) { [{ foo: nil, bar: [] }] }
let(:metadata) { { baz: nil, yep: 'yep' } }
it do
expect(subject[0]).to_not include(:foo)
expect(subject[0]).to_not include(:bar)
expect(subject[0]).to_not include(:baz)
expect(subject[0]).to include(yep: 'yep')
end
end
context 'with a custom hook on each' do
before do
allow(Jekyll::Algolia)
.to receive(:hook_before_indexing_each)
.and_return(hook_each_value)
end
context 'when updating the value' do
let(:metadata) { { foo: 'bar' } }
let(:hook_each_value) { { new_foo: 'new_bar' } }
it { expect(subject[0]).to include(new_foo: 'new_bar') }
end
context 'when returning nil from the hook' do
let(:hook_each_value) { nil }
it { should be_empty }
end
end
context 'with a custom hook on all' do
before do
allow(Jekyll::Algolia)
.to receive(:hook_before_indexing_all)
.and_return(%w[foo bar baz])
end
it { should eq %w[foo bar baz] }
end
end
context 'with real data' do
let(:file) { site.__find_file('about.md') }
describe 'should add a new key to each record' do
it do
expect(subject[0]).to include(added_through_each: true)
expect(subject[1]).to include(added_through_each: true)
expect(subject[2]).to include(added_through_each: true)
end
end
describe 'should add new record at the end' do
let(:expected) { { name: 'Last one' } }
it { expect(subject[-1]).to eq expected }
end
end
it { expect(html_extractor).to have_received(:extract) }
end
end

View File

@ -183,8 +183,8 @@ describe(Jekyll::Algolia::FileBrowser) do
it { should eq '/about.html' }
end
context 'with a post' do
let(:file) { site.__find_file('_posts/2015-07-02-test-post.md') }
it { should eq '/2015/07/02/test-post.html' }
let(:file) { site.__find_file('-test-post-again.md') }
it { should eq '/2015/07/03/test-post-again.html' }
end
context 'with a collection element' do
let(:file) { site.__find_file('_my-collection/collection-item.html') }
@ -295,25 +295,32 @@ describe(Jekyll::Algolia::FileBrowser) do
context 'with a page' do
let(:file) { site.__find_file('about.md') }
it { should include(title: 'About') }
it { should include(custom1: 'foo') }
it { should include(custom2: 'bar') }
it do
should include(title: 'About')
should include(custom1: 'foo')
should include(custom2: 'bar')
should include(customList: %w[foo bar])
end
end
context 'with a post' do
let(:file) { site.__find_file('-test-post-again.md') }
it { should include(title: 'Test post again') }
it { should include(categories: %w[foo bar]) }
it { should include(tags: %w[foo bar]) }
it { should include(draft: false) }
it { should include(ext: '.md') }
let(:file) { site.__find_file('-test-post.md') }
it do
should include(title: 'Test post')
should include(categories: %w[foo bar])
should include(tags: ['tag', 'another tag'])
should include(draft: false)
should include(ext: '.md')
end
end
context 'with a collection item' do
let(:file) { site.__find_file('collection-item.html') }
it { should include(title: 'Collection Item') }
it { should include(categories: []) }
it { should include(tags: []) }
it { should include(draft: false) }
it { should include(ext: '.html') }
it do
should include(title: 'Collection Item')
should include(categories: [])
should include(tags: [])
should include(draft: false)
should include(ext: '.html')
end
end
describe 'should not have modified the inner data' do
@ -323,10 +330,12 @@ describe(Jekyll::Algolia::FileBrowser) do
end
describe 'should not contain keys where we have defined getters' do
let(:file) { site.__find_file('html.html') }
it { should_not include(:slug) }
it { should_not include(:type) }
it { should_not include(:url) }
it { should_not include(:date) }
it do
should_not include(:slug)
should_not include(:type)
should_not include(:url)
should_not include(:date)
end
end
describe 'should not contain some specific keys' do
let(:file) { site.__find_file('html.html') }
@ -385,6 +394,7 @@ describe(Jekyll::Algolia::FileBrowser) do
context 'with real data' do
context 'with a page' do
let(:file) { site.__find_file('about.md') }
it { should include(author: 'Myself') }
it { should_not include(:collection) }
it { should_not include(:date) }
it { should include(slug: 'about') }
@ -398,13 +408,14 @@ describe(Jekyll::Algolia::FileBrowser) do
context 'with a post' do
let(:file) { site.__find_file('-test-post.md') }
it { should_not include(:collection) }
it { should include(categories: %w[foo bar]) }
it { should include(date: 1_435_788_000) }
it { should include(ext: '.md') }
it { should include(slug: 'test-post') }
it { should include(tags: ['tag', 'another tag']) }
it { should include(type: 'post') }
it { should include(title: 'Test post') }
it { should include(url: '/2015/07/02/test-post.html') }
it { should include(url: '/foo/bar/2015/07/02/test-post.html') }
end
context 'with a collection document' do
let(:file) { site.__find_file('collection-item.html') }

View File

@ -4,7 +4,9 @@ date: 04-05-1984
categories:
- foo
- bar
tags: foo bar
tags:
- foo
- bar
---
The grandest of omelettes. Those that feast on dragon eggs often find that there

View File

@ -4,5 +4,15 @@ module Jekyll
def self.hook_should_be_excluded?(filepath)
filepath == 'excluded-from-hook.html'
end
def self.hook_before_indexing_each(record, _node)
record[:added_through_each] = true
record
end
def self.hook_before_indexing_all(records)
records << {
name: 'Last one'
}
records
end
end
end

View File

@ -3,6 +3,9 @@ title: "Test post"
tags:
- tag
- another tag
categories:
- foo
- bar
custom: Foo
---

View File

@ -1,9 +1,5 @@
---
title: "Test post again"
categories:
- foo
- bar
tags: foo bar
---
The goal of this post is simply to trigger pagination, and see that we do not

View File

@ -1,7 +1,11 @@
---
title: About
author: Myself
custom1: foo
custom2: bar
customList:
- foo
- bar
---
# Heading 1

View File

@ -11,5 +11,3 @@ title: Only divs
<div>This is the fourth paragraph</div>
<div>This is the fifth paragraph</div>
<div>This is the last paragraph</div>

View File

@ -19,457 +19,6 @@ describe(AlgoliaSearchRecordExtractor) do
mock_logger
end
describe 'type' do
it 'should recognize a page' do
# Given
input = fixture_page
# When
actual = input.type
expect(actual).to eq 'page'
end
it 'should recognize a post' do
# Given
input = fixture_post
# When
actual = input.type
expect(actual).to eq 'post'
end
it 'should recognize a document' do
# Given
input = fixture_document
# When
actual = input.type
expect(actual).to eq 'document'
end
end
describe 'url' do
it 'should use the page url' do
# Given
input = fixture_page
# When
actual = input.url
expect(actual).to eq '/about.html'
end
it 'should use the post url' do
# Given
input = fixture_post
# When
actual = input.url
expect(actual).to eq '/2015/07/02/test-post.html'
end
it 'should use the document url' do
# Given
input = fixture_document
# When
actual = input.url
expect(actual).to eq '/my-collection/collection-item.html'
end
end
describe 'title' do
it 'should use the page title' do
# Given
input = fixture_page
# When
actual = input.title
expect(actual).to eq 'About page'
end
it 'should use the post title' do
# Given
input = fixture_post
# When
actual = input.title
expect(actual).to eq 'Test post'
end
it 'should use the document title' do
# Given
input = fixture_document
# When
actual = input.title
expect(actual).to eq 'Collection Item'
end
end
describe 'slug' do
it 'should get it for a page' do
# Given
input = fixture_page
# When
actual = input.slug
expect(actual).to eq 'about'
end
it 'should get it for a post' do
# Given
input = fixture_post
# When
actual = input.slug
expect(actual).to eq 'test-post'
end
it 'should get it for a document' do
# Given
input = fixture_document
# When
actual = input.slug
expect(actual).to eq 'collection-item'
end
end
describe 'tags' do
it 'should get tags from page' do
# Given
input = fixture_page
# When
actual = input.tags
expect(actual).to include('tag', 'another tag')
end
it 'should get tags from post' do
# Given
input = fixture_post
# When
actual = input.tags
expect(actual).to include('tag', 'another tag')
end
it 'should get tags from document' do
# Given
input = fixture_document
# When
actual = input.tags
expect(actual).to include('tag', 'another tag')
end
it 'should handle custom extended tags' do
# Given
extended_tags = [
double('Extended Tag', to_s: 'extended tag'),
double('Extended Tag', to_s: 'extended another tag')
]
input = fixture_post
input.file.data['tags'] = extended_tags
# When
actual = input.tags
expect(actual).to include('extended tag', 'extended another tag')
end
end
describe 'date' do
it 'should get the date as a timestamp for posts' do
# Given
input = fixture_post
# When
actual = input.date
# Then
expect(actual).to eq 1_435_788_000
end
it 'should be nil for pages' do
# Given
input = fixture_page
# When
actual = input.date
# Then
expect(actual).to eq nil
end
it 'should generate the timestamp relative to the configured timezone' do
# Given
site = get_site(timezone: 'America/New_York')
input = extractor.new(site.file_by_name('test-post.md'))
# When
actual = input.date
# Then
expect(actual).to eq 1_435_809_600
end
end
describe 'collection' do
it 'should get the collection name for documents' do
# Given
input = fixture_document
# When
actual = input.collection
# Then
expect(actual).to eq 'my-collection'
end
it 'should be nil for pages' do
# Given
input = fixture_page
# When
actual = input.collection
# Then
expect(actual).to eq nil
end
it 'should be nil for posts' do
# Given
input = fixture_post
# When
actual = input.collection
# Then
expect(actual).to eq nil
end
end
describe 'front_matter' do
it 'should get a hash of all front matter data' do
# Given
input = fixture_front_matter
# When
actual = input.front_matter
# Then
expect(actual[:author]).to eq 'John Doe'
expect(actual[:custom]).to eq 'foo'
end
it 'should remove known keys from the front-matter' do
# Given
input = fixture_front_matter
# When
actual = input.front_matter
# Then
expect(actual[:title]).to eq nil
expect(actual[:tags]).to eq nil
expect(actual[:slug]).to eq nil
expect(actual[:url]).to eq nil
expect(actual[:date]).to eq nil
expect(actual[:type]).to eq nil
end
it 'should cast keys as symbols' do
# Given
input = fixture_front_matter
# When
actual = input.front_matter
# Then
expect(actual['custom']).to eq nil
expect(actual[:custom]).to_not eq nil
expect(actual['author']).to eq nil
expect(actual[:author]).to_not eq nil
end
end
describe 'extract' do
it 'should get one item per node' do
# Given
input = fixture_only_paragraphs
# When
actual = input.extract
# Then
expect(actual.size).to eq 6
end
it 'should get a complete record' do
# Given
input = fixture_page
# When
actual = input.extract
# Then
# Jekyll auto-generates anchors on heading
expect(actual[0][:anchor]).to eq 'heading-1'
# It's a page, so no date
expect(actual[0][:date]).to eq nil
# Hierarchy on first level
expect(actual[0][:hierarchy][:lvl0]).to eq 'Heading 1'
expect(actual[0][:hierarchy][:lvl1]).to eq nil
# Node content
expect(actual[0][:tag_name]).to eq 'p'
expect(actual[0][:html]).to eq '<p>Text 1</p>'
expect(actual[0][:text]).to eq 'Text 1'
# Page
expect(actual[0][:title]).to eq 'About page'
expect(actual[0][:slug]).to eq 'about'
expect(actual[0][:url]).to eq '/about.html'
# Tags
expect(actual[0][:tags]).to eq ['tag', 'another tag']
# Weight
expect(actual[0][:weight][:heading]).to eq 90
expect(actual[0][:weight][:position]).to eq 0
end
it 'should allow overriding the node selector' do
# Given
site = get_site(algolia: { 'nodes_to_index' => 'div' })
input = extractor.new(site.file_by_name('only-divs.md'))
# When
actual = input.extract
# Then
expect(actual.size).to eq 6
end
it 'should contain all the basic top level info' do
# Given
input = fixture_page
allow(input).to receive(:date) { 'mock_date' }
allow(input).to receive(:slug) { 'mock_slug' }
allow(input).to receive(:tags) { 'mock_tags' }
allow(input).to receive(:title) { 'mock_title' }
allow(input).to receive(:url) { 'mock_url' }
allow(input).to receive(:type) { 'mock_type' }
# When
actual = input.extract
# Then
expect(actual[0][:date]).to eq 'mock_date'
expect(actual[0][:slug]).to eq 'mock_slug'
expect(actual[0][:tags]).to eq 'mock_tags'
expect(actual[0][:title]).to eq 'mock_title'
expect(actual[0][:url]).to eq 'mock_url'
expect(actual[0][:type]).to eq 'mock_type'
end
it 'should add node data from extractor' do
# Given
input = fixture_page
allow(input).to receive(:hierarchy_nodes) do
[
{ name: 'foo' },
{ name: 'bar' }
]
end
# When
actual = input.extract
# Then
expect(actual[0][:name]).to eq 'foo'
end
it 'should not expose the HTML node' do
# Given
input = fixture_only_paragraphs
# When
actual = input.extract
# Then
expect(actual[0][:node]).to eq nil
end
it 'should set the objectID as a hash' do
# Given
input = fixture_page
# When
actual = input.extract
# Then
expect(actual[0]).not_to have_key(:uuid)
expect(actual[0]).to have_key(:objectID)
end
it 'should not contain a collection key for pages' do
# Given
input = fixture_page
# When
actual = input.extract
# Then
expect(actual[0]).not_to have_key(:collection)
end
it 'should not contain a collection key for posts' do
# Given
input = fixture_post
# When
actual = input.extract
# Then
expect(actual[0]).not_to have_key(:collection)
end
it 'should contain the collection name for documents' do
# Given
page = fixture_document
# When
page_data = page.extract
# Then
expect(page_data[0][:collection]).to eq 'my-collection'
end
it 'should not contain a date key for pages' do
# Given
input = fixture_page
# When
actual = input.extract
# Then
expect(actual[0]).not_to have_key(:date)
end
end
describe 'custom_hook_each' do
it 'should be called on every item' do
# Given