Adding tests for extractor run

2017-11-10 19:31:52 +01:00 · 2017-11-10 19:31:52 +01:00 · a437f15a24
commit a437f15a24
parent 7ef8296e6f
10 changed files with 208 additions and 510 deletions
--- a/lib/jekyll/algolia/extractor.rb
+++ b/lib/jekyll/algolia/extractor.rb
@ -25,7 +25,7 @@ module Jekyll
          record.delete(:node)

          # Merging each record info with file info
-          record = record.merge(shared_metadata).compact
+          record = Utils.compact_empty(record.merge(shared_metadata))

          # Apply custom user-defined hooks
          record = Jekyll::Algolia.hook_before_indexing_each(record, node)
@ -53,10 +53,6 @@ module Jekyll
          }
        ).extract
      end
-
-      def self.convert_to_json(raw_record)
-        raw_record
-      end
    end
  end
 end
--- a/spec/jekyll/algolia/extractor_spec.rb
+++ b/spec/jekyll/algolia/extractor_spec.rb
@ -2,39 +2,168 @@
 require 'spec_helper'

 describe(Jekyll::Algolia::Extractor) do
+  let(:configurator) { Jekyll::Algolia::Configurator }
+  let(:filebrowser) { Jekyll::Algolia::FileBrowser }
  let(:current) { Jekyll::Algolia::Extractor }
+  let(:site) { init_new_jekyll_site }
+
+  # Suppress Jekyll log about reading the config file
+  before do
+    allow(Jekyll.logger).to receive(:info)
+  end

  describe '.extract_raw_records' do
-    # Given
-    let(:content) { 'some html markup' }
-    let(:html_extractor) { double('AlgoliaHTMLExtractor', extract: nil) }
-    let(:configurator) { Jekyll::Algolia::Configurator }
-    before do
-      allow(AlgoliaHTMLExtractor)
-        .to receive(:new)
-        .and_return(html_extractor)
-    end
-    before do
-      allow(configurator)
-        .to receive(:algolia)
-        .with('nodes_to_index')
-        .and_return('foo')
+    context 'with mock data' do
+      # Given
+      let(:content) { 'some html markup' }
+      let(:html_extractor) { double('AlgoliaHTMLExtractor', extract: nil) }
+      before do
+        allow(AlgoliaHTMLExtractor)
+          .to receive(:new)
+          .and_return(html_extractor)
+      end
+      before do
+        allow(configurator)
+          .to receive(:algolia)
+          .with('nodes_to_index')
+          .and_return('foo')
+      end
+
+      # When
+      before { current.extract_raw_records(content) }
+
+      # Then
+      it 'should create a new AlgoliaHTMLExtractor with the content passed' do
+        expect(AlgoliaHTMLExtractor)
+          .to have_received(:new)
+          .with(content, anything)
+      end
+      it 'should configure the extractor with the nodex_to_index config value' do
+        expect(AlgoliaHTMLExtractor)
+          .to have_received(:new)
+          .with(anything, options: { css_selector: 'foo' })
+      end
+      it { expect(html_extractor).to have_received(:extract) }
    end

-    # When
-    before { current.extract_raw_records(content) }
+    context 'with real data' do
+      let(:site) { init_new_jekyll_site }
+      subject { current.extract_raw_records(content) }

-    # Then
-    it 'should create a new AlgoliaHTMLExtractor with the content passed' do
-      expect(AlgoliaHTMLExtractor)
-        .to have_received(:new)
-        .with(content, anything)
+      context 'with a page' do
+        let(:content) { site.__find_file('only-paragraphs.md').content }
+        it { expect(subject.length).to eq 6 }
+      end
+      context 'with a page with divs' do
+        let(:content) { site.__find_file('only-divs.md').content }
+        before do
+          allow(configurator)
+            .to receive(:algolia)
+            .with('nodes_to_index')
+            .and_return('div')
+        end
+        it { expect(subject.length).to eq 5 }
+      end
    end
-    it 'should configure the extractor with the nodex_to_index config value' do
-      expect(AlgoliaHTMLExtractor)
-        .to have_received(:new)
-        .with(anything, options: { css_selector: 'foo' })
+  end
+
+  describe '.run' do
+    subject { current.run(file) }
+
+    context 'with mock data' do
+      let!(:file) { site.__find_file('html.html') }
+      before do
+        allow(Jekyll::Algolia)
+          .to receive(:hook_before_indexing_all)
+            .with(anything) { |input| input }
+
+        allow(Jekyll::Algolia)
+          .to receive(:hook_before_indexing_each)
+            .with(anything, anything) { |input| input }
+
+        allow(current)
+          .to receive(:extract_raw_records)
+          .and_return(raw_records)
+
+        allow(filebrowser)
+          .to receive(:metadata)
+          .and_return(metadata)
+      end
+      let(:raw_records) { [{}] }
+      let(:metadata) { {} }
+
+      describe 'should have one record per element extracted' do
+        let(:raw_records) { [{ foo: 'bar' }, { baz: 'foo' }] }
+        it { expect(subject.length).to eq 2 }
+      end
+
+      describe 'should all have the same common shared data' do
+        let(:raw_records) { [{ foo: 'bar' }, { baz: 'foo' }] }
+        let(:metadata) { { foo: 'bar' } }
+        it { expect(subject[0]).to include(foo: 'bar') }
+        it { expect(subject[1]).to include(foo: 'bar') }
+      end
+
+      describe 'should not contain a :node key' do
+        let(:raw_records) { [{ foo: 'bar', node: 'nope' }] }
+        it { expect(subject[0]).to_not include(:node) }
+      end
+
+      describe 'should not contain any nil or empty array keys' do
+        let(:raw_records) { [{ foo: nil, bar: [] }] }
+        let(:metadata) { { baz: nil, yep: 'yep' } }
+        it do
+          expect(subject[0]).to_not include(:foo)
+          expect(subject[0]).to_not include(:bar)
+          expect(subject[0]).to_not include(:baz)
+          expect(subject[0]).to include(yep: 'yep')
+        end
+      end
+
+      context 'with a custom hook on each' do
+        before do
+          allow(Jekyll::Algolia)
+            .to receive(:hook_before_indexing_each)
+            .and_return(hook_each_value)
+        end
+
+        context 'when updating the value' do
+          let(:metadata) { { foo: 'bar' } }
+          let(:hook_each_value) { { new_foo: 'new_bar' } }
+          it { expect(subject[0]).to include(new_foo: 'new_bar') }
+        end
+
+        context 'when returning nil from the hook' do
+          let(:hook_each_value) { nil }
+          it { should be_empty }
+        end
+      end
+
+      context 'with a custom hook on all' do
+        before do
+          allow(Jekyll::Algolia)
+            .to receive(:hook_before_indexing_all)
+            .and_return(%w[foo bar baz])
+        end
+        it { should eq %w[foo bar baz] }
+      end
+    end
+
+    context 'with real data' do
+      let(:file) { site.__find_file('about.md') }
+
+      describe 'should add a new key to each record' do
+        it do
+          expect(subject[0]).to include(added_through_each: true)
+          expect(subject[1]).to include(added_through_each: true)
+          expect(subject[2]).to include(added_through_each: true)
+        end
+      end
+
+      describe 'should add new record at the end' do
+        let(:expected) { { name: 'Last one' } }
+        it { expect(subject[-1]).to eq expected }
+      end
    end
-    it { expect(html_extractor).to have_received(:extract) }
  end
 end
--- a/spec/jekyll/algolia/file_browser_spec.rb
+++ b/spec/jekyll/algolia/file_browser_spec.rb
@ -183,8 +183,8 @@ describe(Jekyll::Algolia::FileBrowser) do
      it { should eq '/about.html' }
    end
    context 'with a post' do
-      let(:file) { site.__find_file('_posts/2015-07-02-test-post.md') }
-      it { should eq '/2015/07/02/test-post.html' }
+      let(:file) { site.__find_file('-test-post-again.md') }
+      it { should eq '/2015/07/03/test-post-again.html' }
    end
    context 'with a collection element' do
      let(:file) { site.__find_file('_my-collection/collection-item.html') }
@ -295,25 +295,32 @@ describe(Jekyll::Algolia::FileBrowser) do

    context 'with a page' do
      let(:file) { site.__find_file('about.md') }
-      it { should include(title: 'About') }
-      it { should include(custom1: 'foo') }
-      it { should include(custom2: 'bar') }
+      it do
+        should include(title: 'About')
+        should include(custom1: 'foo')
+        should include(custom2: 'bar')
+        should include(customList: %w[foo bar])
+      end
    end
    context 'with a post' do
-      let(:file) { site.__find_file('-test-post-again.md') }
-      it { should include(title: 'Test post again') }
-      it { should include(categories: %w[foo bar]) }
-      it { should include(tags: %w[foo bar]) }
-      it { should include(draft: false) }
-      it { should include(ext: '.md') }
+      let(:file) { site.__find_file('-test-post.md') }
+      it do
+        should include(title: 'Test post')
+        should include(categories: %w[foo bar])
+        should include(tags: ['tag', 'another tag'])
+        should include(draft: false)
+        should include(ext: '.md')
+      end
    end
    context 'with a collection item' do
      let(:file) { site.__find_file('collection-item.html') }
-      it { should include(title: 'Collection Item') }
-      it { should include(categories: []) }
-      it { should include(tags: []) }
-      it { should include(draft: false) }
-      it { should include(ext: '.html') }
+      it do
+        should include(title: 'Collection Item')
+        should include(categories: [])
+        should include(tags: [])
+        should include(draft: false)
+        should include(ext: '.html')
+      end
    end

    describe 'should not have modified the inner data' do
@ -323,10 +330,12 @@ describe(Jekyll::Algolia::FileBrowser) do
    end
    describe 'should not contain keys where we have defined getters' do
      let(:file) { site.__find_file('html.html') }
-      it { should_not include(:slug) }
-      it { should_not include(:type) }
-      it { should_not include(:url) }
-      it { should_not include(:date) }
+      it do
+        should_not include(:slug)
+        should_not include(:type)
+        should_not include(:url)
+        should_not include(:date)
+      end
    end
    describe 'should not contain some specific keys' do
      let(:file) { site.__find_file('html.html') }
@ -385,6 +394,7 @@ describe(Jekyll::Algolia::FileBrowser) do
    context 'with real data' do
      context 'with a page' do
        let(:file) { site.__find_file('about.md') }
+        it { should include(author: 'Myself') }
        it { should_not include(:collection) }
        it { should_not include(:date) }
        it { should include(slug: 'about') }
@ -398,13 +408,14 @@ describe(Jekyll::Algolia::FileBrowser) do
      context 'with a post' do
        let(:file) { site.__find_file('-test-post.md') }
        it { should_not include(:collection) }
+        it { should include(categories: %w[foo bar]) }
        it { should include(date: 1_435_788_000) }
        it { should include(ext: '.md') }
        it { should include(slug: 'test-post') }
        it { should include(tags: ['tag', 'another tag']) }
        it { should include(type: 'post') }
        it { should include(title: 'Test post') }
-        it { should include(url: '/2015/07/02/test-post.html') }
+        it { should include(url: '/foo/bar/2015/07/02/test-post.html') }
      end
      context 'with a collection document' do
        let(:file) { site.__find_file('collection-item.html') }
--- a/spec/site/_my-collection/collection-item.md
+++ b/spec/site/_my-collection/collection-item.md
@ -4,7 +4,9 @@ date: 04-05-1984
 categories: 
  - foo
  - bar
-tags: foo bar
+tags: 
+  - foo
+  - bar
 ---

 The grandest of omelettes. Those that feast on dragon eggs often find that there
--- a/spec/site/_plugins/algolia.rb
+++ b/spec/site/_plugins/algolia.rb
@ -4,5 +4,15 @@ module Jekyll
    def self.hook_should_be_excluded?(filepath)
      filepath == 'excluded-from-hook.html'
    end
+    def self.hook_before_indexing_each(record, _node)
+      record[:added_through_each] = true
+      record
+    end
+    def self.hook_before_indexing_all(records)
+      records << {
+        name: 'Last one'
+      }
+      records
+    end
  end
 end
--- a/spec/site/_posts/2015-07-02-test-post.md
+++ b/spec/site/_posts/2015-07-02-test-post.md
@ -3,6 +3,9 @@ title: "Test post"
 tags:
  - tag
  - another tag
+categories:
+  - foo
+  - bar
 custom: Foo
 ---

--- a/spec/site/_posts/2015-07-03-test-post-again.md
+++ b/spec/site/_posts/2015-07-03-test-post-again.md
@ -1,9 +1,5 @@
 ---
 title: "Test post again"
-categories: 
-  - foo
-  - bar
-tags: foo bar
 ---

 The goal of this post is simply to trigger pagination, and see that we do not
--- a/spec/site/about.md
+++ b/spec/site/about.md
@ -1,7 +1,11 @@
 ---
 title: About
+author: Myself
 custom1: foo
 custom2: bar
+customList:
+  - foo
+  - bar
 ---

 # Heading 1
--- a/spec/site/only-divs.md
+++ b/spec/site/only-divs.md
@ -11,5 +11,3 @@ title: Only divs
 <div>This is the fourth paragraph</div>

 <div>This is the fifth paragraph</div>
-
-<div>This is the last paragraph</div>
--- a/spec_old/record_extractor_spec.rb
+++ b/spec_old/record_extractor_spec.rb
@ -19,457 +19,6 @@ describe(AlgoliaSearchRecordExtractor) do
    mock_logger
  end

-  describe 'type' do
-    it 'should recognize a page' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.type
-
-      expect(actual).to eq 'page'
-    end
-
-    it 'should recognize a post' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.type
-
-      expect(actual).to eq 'post'
-    end
-
-    it 'should recognize a document' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.type
-
-      expect(actual).to eq 'document'
-    end
-  end
-
-  describe 'url' do
-    it 'should use the page url' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.url
-
-      expect(actual).to eq '/about.html'
-    end
-
-    it 'should use the post url' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.url
-
-      expect(actual).to eq '/2015/07/02/test-post.html'
-    end
-
-    it 'should use the document url' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.url
-
-      expect(actual).to eq '/my-collection/collection-item.html'
-    end
-  end
-
-  describe 'title' do
-    it 'should use the page title' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.title
-
-      expect(actual).to eq 'About page'
-    end
-
-    it 'should use the post title' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.title
-
-      expect(actual).to eq 'Test post'
-    end
-
-    it 'should use the document title' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.title
-
-      expect(actual).to eq 'Collection Item'
-    end
-  end
-
-  describe 'slug' do
-    it 'should get it for a page' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.slug
-
-      expect(actual).to eq 'about'
-    end
-
-    it 'should get it for a post' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.slug
-
-      expect(actual).to eq 'test-post'
-    end
-
-    it 'should get it for a document' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.slug
-
-      expect(actual).to eq 'collection-item'
-    end
-  end
-
-  describe 'tags' do
-    it 'should get tags from page' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.tags
-
-      expect(actual).to include('tag', 'another tag')
-    end
-
-    it 'should get tags from post' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.tags
-
-      expect(actual).to include('tag', 'another tag')
-    end
-
-    it 'should get tags from document' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.tags
-
-      expect(actual).to include('tag', 'another tag')
-    end
-
-    it 'should handle custom extended tags' do
-      # Given
-      extended_tags = [
-        double('Extended Tag', to_s: 'extended tag'),
-        double('Extended Tag', to_s: 'extended another tag')
-      ]
-      input = fixture_post
-      input.file.data['tags'] = extended_tags
-
-      # When
-      actual = input.tags
-
-      expect(actual).to include('extended tag', 'extended another tag')
-    end
-  end
-
-  describe 'date' do
-    it 'should get the date as a timestamp for posts' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.date
-
-      # Then
-      expect(actual).to eq 1_435_788_000
-    end
-
-    it 'should be nil for pages' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.date
-
-      # Then
-      expect(actual).to eq nil
-    end
-
-    it 'should generate the timestamp relative to the configured timezone' do
-      # Given
-      site = get_site(timezone: 'America/New_York')
-      input = extractor.new(site.file_by_name('test-post.md'))
-
-      # When
-      actual = input.date
-
-      # Then
-      expect(actual).to eq 1_435_809_600
-    end
-  end
-
-  describe 'collection' do
-    it 'should get the collection name for documents' do
-      # Given
-      input = fixture_document
-
-      # When
-      actual = input.collection
-
-      # Then
-      expect(actual).to eq 'my-collection'
-    end
-
-    it 'should be nil for pages' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.collection
-
-      # Then
-      expect(actual).to eq nil
-    end
-
-    it 'should be nil for posts' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.collection
-
-      # Then
-      expect(actual).to eq nil
-    end
-  end
-
-  describe 'front_matter' do
-    it 'should get a hash of all front matter data' do
-      # Given
-      input = fixture_front_matter
-
-      # When
-      actual = input.front_matter
-
-      # Then
-      expect(actual[:author]).to eq 'John Doe'
-      expect(actual[:custom]).to eq 'foo'
-    end
-
-    it 'should remove known keys from the front-matter' do
-      # Given
-      input = fixture_front_matter
-
-      # When
-      actual = input.front_matter
-
-      # Then
-      expect(actual[:title]).to eq nil
-      expect(actual[:tags]).to eq nil
-      expect(actual[:slug]).to eq nil
-      expect(actual[:url]).to eq nil
-      expect(actual[:date]).to eq nil
-      expect(actual[:type]).to eq nil
-    end
-
-    it 'should cast keys as symbols' do
-      # Given
-      input = fixture_front_matter
-
-      # When
-      actual = input.front_matter
-
-      # Then
-      expect(actual['custom']).to eq nil
-      expect(actual[:custom]).to_not eq nil
-      expect(actual['author']).to eq nil
-      expect(actual[:author]).to_not eq nil
-    end
-  end
-
-  describe 'extract' do
-    it 'should get one item per node' do
-      # Given
-      input = fixture_only_paragraphs
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual.size).to eq 6
-    end
-
-    it 'should get a complete record' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.extract
-
-      # Then
-      # Jekyll auto-generates anchors on heading
-      expect(actual[0][:anchor]).to eq 'heading-1'
-      # It's a page, so no date
-      expect(actual[0][:date]).to eq nil
-      # Hierarchy on first level
-      expect(actual[0][:hierarchy][:lvl0]).to eq 'Heading 1'
-      expect(actual[0][:hierarchy][:lvl1]).to eq nil
-      # Node content
-      expect(actual[0][:tag_name]).to eq 'p'
-      expect(actual[0][:html]).to eq '<p>Text 1</p>'
-      expect(actual[0][:text]).to eq 'Text 1'
-      # Page
-      expect(actual[0][:title]).to eq 'About page'
-      expect(actual[0][:slug]).to eq 'about'
-      expect(actual[0][:url]).to eq '/about.html'
-      # Tags
-      expect(actual[0][:tags]).to eq ['tag', 'another tag']
-      # Weight
-      expect(actual[0][:weight][:heading]).to eq 90
-      expect(actual[0][:weight][:position]).to eq 0
-    end
-
-    it 'should allow overriding the node selector' do
-      # Given
-      site = get_site(algolia: { 'nodes_to_index' => 'div' })
-      input = extractor.new(site.file_by_name('only-divs.md'))
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual.size).to eq 6
-    end
-
-    it 'should contain all the basic top level info' do
-      # Given
-      input = fixture_page
-      allow(input).to receive(:date) { 'mock_date' }
-      allow(input).to receive(:slug) { 'mock_slug' }
-      allow(input).to receive(:tags) { 'mock_tags' }
-      allow(input).to receive(:title) { 'mock_title' }
-      allow(input).to receive(:url) { 'mock_url' }
-      allow(input).to receive(:type) { 'mock_type' }
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0][:date]).to eq 'mock_date'
-      expect(actual[0][:slug]).to eq 'mock_slug'
-      expect(actual[0][:tags]).to eq 'mock_tags'
-      expect(actual[0][:title]).to eq 'mock_title'
-      expect(actual[0][:url]).to eq 'mock_url'
-      expect(actual[0][:type]).to eq 'mock_type'
-    end
-
-    it 'should add node data from extractor' do
-      # Given
-      input = fixture_page
-      allow(input).to receive(:hierarchy_nodes) do
-        [
-          { name: 'foo' },
-          { name: 'bar' }
-        ]
-      end
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0][:name]).to eq 'foo'
-    end
-
-    it 'should not expose the HTML node' do
-      # Given
-      input = fixture_only_paragraphs
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0][:node]).to eq nil
-    end
-
-    it 'should set the objectID as a hash' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0]).not_to have_key(:uuid)
-      expect(actual[0]).to have_key(:objectID)
-    end
-
-    it 'should not contain a collection key for pages' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0]).not_to have_key(:collection)
-    end
-
-    it 'should not contain a collection key for posts' do
-      # Given
-      input = fixture_post
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0]).not_to have_key(:collection)
-    end
-
-    it 'should contain the collection name for documents' do
-      # Given
-      page = fixture_document
-
-      # When
-      page_data = page.extract
-
-      # Then
-      expect(page_data[0][:collection]).to eq 'my-collection'
-    end
-
-    it 'should not contain a date key for pages' do
-      # Given
-      input = fixture_page
-
-      # When
-      actual = input.extract
-
-      # Then
-      expect(actual[0]).not_to have_key(:date)
-    end
-  end
-
  describe 'custom_hook_each' do
    it 'should be called on every item' do
      # Given