Replace hand-coded lexer with faster hacky lexer.

This commit is contained in:
Tristan Hume 2013-07-24 11:41:47 -04:00
parent 2332d86156
commit ee14775f83
3 changed files with 5 additions and 63 deletions

View File

@ -47,7 +47,6 @@ end
require "liquid/version"
require 'liquid/parser'
require 'liquid/lexer'
require 'liquid/drop'
require 'liquid/extensions'
require 'liquid/errors'

View File

@ -1,61 +0,0 @@
module Liquid
class Lexer
SPECIALS = {
'|' => :pipe,
'.' => :dot,
':' => :colon,
',' => :comma
}
def tokenize(input)
@p = 0
@output = []
@input = input.chars.to_a
loop do
consume_whitespace
c = @input[@p]
return @output unless c
if identifier?(c)
@output << consume_identifier
elsif s = SPECIALS[c]
@output << s
@p += 1
end
end
end
def benchmark
require 'benchmark'
s = "bob.hello | filter: lol, troll"
Benchmark.bmbm do |x|
x.report('c') { 100_000.times { tokenize(s) }}
x.report('r') { 100_000.times { s.split(/\b/).map {|y| y.strip} }}
end
end
def identifier?(c)
c =~ /^[\w\-]$/
end
def whitespace?(c)
c =~ /^\s$/
end
def consume_whitespace
while whitespace?(@input[@p])
@p += 1
end
end
def consume_identifier
str = ""
while identifier?(@input[@p])
str << @input[@p]
@p += 1
end
str
end
end
end

View File

@ -3,8 +3,12 @@ module Liquid
# it provides helpers and encapsulates state
class Parser
def initialize(input)
@input = input
@tokens = tokenize(input)
@p = 0 # pointer to current location
end
def tokenize(input)
input.split(/\b/).map {|tok| tok.strip}
end
end
end