From ee14775f8350b8a571418e32c0922c3601c8fbf3 Mon Sep 17 00:00:00 2001 From: Tristan Hume Date: Wed, 24 Jul 2013 11:41:47 -0400 Subject: [PATCH] Replace hand-coded lexer with faster hacky lexer. --- lib/liquid.rb | 1 - lib/liquid/lexer.rb | 61 -------------------------------------------- lib/liquid/parser.rb | 6 ++++- 3 files changed, 5 insertions(+), 63 deletions(-) delete mode 100644 lib/liquid/lexer.rb diff --git a/lib/liquid.rb b/lib/liquid.rb index 6d2fe8a8..a377f95b 100644 --- a/lib/liquid.rb +++ b/lib/liquid.rb @@ -47,7 +47,6 @@ end require "liquid/version" require 'liquid/parser' -require 'liquid/lexer' require 'liquid/drop' require 'liquid/extensions' require 'liquid/errors' diff --git a/lib/liquid/lexer.rb b/lib/liquid/lexer.rb deleted file mode 100644 index eef9c9c4..00000000 --- a/lib/liquid/lexer.rb +++ /dev/null @@ -1,61 +0,0 @@ -module Liquid - class Lexer - SPECIALS = { - '|' => :pipe, - '.' => :dot, - ':' => :colon, - ',' => :comma - } - - def tokenize(input) - @p = 0 - @output = [] - @input = input.chars.to_a - - loop do - consume_whitespace - c = @input[@p] - return @output unless c - - if identifier?(c) - @output << consume_identifier - elsif s = SPECIALS[c] - @output << s - @p += 1 - end - end - end - - def benchmark - require 'benchmark' - s = "bob.hello | filter: lol, troll" - Benchmark.bmbm do |x| - x.report('c') { 100_000.times { tokenize(s) }} - x.report('r') { 100_000.times { s.split(/\b/).map {|y| y.strip} }} - end - end - - def identifier?(c) - c =~ /^[\w\-]$/ - end - - def whitespace?(c) - c =~ /^\s$/ - end - - def consume_whitespace - while whitespace?(@input[@p]) - @p += 1 - end - end - - def consume_identifier - str = "" - while identifier?(@input[@p]) - str << @input[@p] - @p += 1 - end - str - end - end -end diff --git a/lib/liquid/parser.rb b/lib/liquid/parser.rb index 47b57651..1b69330d 100644 --- a/lib/liquid/parser.rb +++ b/lib/liquid/parser.rb @@ -3,8 +3,12 @@ module Liquid # it provides helpers and encapsulates state class Parser def initialize(input) - @input = input + @tokens = tokenize(input) @p = 0 # pointer to current location end + + def tokenize(input) + input.split(/\b/).map {|tok| tok.strip} + end end end