2015-08-22 14:29:41 +02:00

188 lines
5.9 KiB
Python

# -*- coding: utf-8 -*-
"""
***************************************************************************
parsing.py
---------------------
Copyright : (C) 2013 by CS Systemes d'information (CS SI)
Email : otb at c-s dot fr (CS SI)
Contributors : Julien Malik (CS SI)
Oscar Picas (CS SI)
***************************************************************************
* *
* This program is free software; you can redistribute it and/or modify *
* it under the terms of the GNU General Public License as published by *
* the Free Software Foundation; either version 2 of the License, or *
* (at your option) any later version. *
* *
***************************************************************************
"""
__author__ = 'Julien Malik, Oscar Picas'
__copyright__ = '(C) 2013, CS Systemes d\'information (CS SI)'
from collections import namedtuple
import re
def merge_pairs(list, should_merge, merge):
"""
Merges adjacent elements of list using the function merge
if they satisfy the predicate should_merge.
"""
ret = []
i = 0
while i < len(list) - 1:
a = list[i]
b = list[i + 1]
if should_merge(a, b):
ret.append(merge(a, b))
i += 2
else:
ret.append(a)
i += 1
if i == len(list) - 1:
ret.append(list[i])
return ret
QuotedString = namedtuple('QuotedString', 'contents comments')
_Arg = namedtuple('Arg', 'contents comments')
_Command = namedtuple('Command', 'name body comment')
BlankLine = namedtuple('BlankLine', '')
class File(list):
def __repr__(self):
return 'File(' + repr(list(self)) + ')'
class Comment(str):
def __repr__(self):
return 'Comment(' + unicode(self) + ')'
def Arg(contents, comments=None):
return _Arg(contents, comments or [])
def Command(name, body, comment=None):
return _Command(name, body, comment)
class CMakeParseError(Exception):
pass
def prettify(s):
"""
Returns the pretty-print of the contents of a CMakeLists file.
"""
return unicode(parse(s))
def parse(s):
'''
Parses a string s in CMakeLists format whose
contents are assumed to have come from the
file at the given path.
'''
nums_toks = tokenize(s)
nums_items = list(parse_file(nums_toks))
nums_items = attach_comments_to_commands(nums_items)
items = [item for _, item in nums_items]
return File(items)
def parse_file(toks):
'''
Yields line number ranges and top-level elements of the syntax tree for
a CMakeLists file, given a generator of tokens from the file.
toks must really be a generator, not a list, for this to work.
'''
prev_type = 'newline'
for line_num, (typ, tok_contents) in toks:
if typ == 'comment':
yield ([line_num], Comment(tok_contents))
elif typ == 'newline' and prev_type == 'newline':
yield ([line_num], BlankLine())
elif typ == 'word':
line_nums, cmd = parse_command(line_num, tok_contents, toks)
yield (line_nums, cmd)
prev_type = typ
def attach_comments_to_commands(nodes):
return merge_pairs(nodes, command_then_comment, attach_comment_to_command)
def command_then_comment(a, b):
line_nums_a, thing_a = a
line_nums_b, thing_b = b
return (isinstance(thing_a, _Command) and
isinstance(thing_b, Comment) and
set(line_nums_a).intersection(line_nums_b))
def attach_comment_to_command(lnums_command, lnums_comment):
command_lines, command = lnums_command
_, comment = lnums_comment
return command_lines, Command(command.name, command.body[:], comment)
def parse_command(start_line_num, command_name, toks):
cmd = Command(name=command_name, body=[], comment=None)
expect('left paren', toks)
for line_num, (typ, tok_contents) in toks:
if typ == 'right paren':
line_nums = range(start_line_num, line_num + 1)
return line_nums, cmd
elif typ == 'left paren':
raise ValueError('Unexpected left paren at line %s' % line_num)
elif typ in ('word', 'string'):
cmd.body.append(Arg(tok_contents, []))
elif typ == 'comment':
c = tok_contents
if cmd.body:
cmd.body[-1].comments.append(c)
else:
cmd.comments.append(c)
msg = 'File ended while processing command "%s" started at line %s' % (
command_name, start_line_num)
raise CMakeParseError(msg)
def expect(expected_type, toks):
line_num, (typ, tok_contents) = toks.next()
if typ != expected_type:
msg = 'Expected a %s, but got "%s" at line %s' % (
expected_type, tok_contents, line_num)
raise CMakeParseError(msg)
# http://stackoverflow.com/questions/691148/pythonic-way-to-implement-a-tokenizer
scanner = re.Scanner([
(r'#.*', lambda scanner, token: ("comment", token)),
(r'"[^"]*"', lambda scanner, token: ("string", token)),
(r"\(", lambda scanner, token: ("left paren", token)),
(r"\)", lambda scanner, token: ("right paren", token)),
(r'[^ \t\r\n()#"]+', lambda scanner, token: ("word", token)),
(r'\n', lambda scanner, token: ("newline", token)),
(r"\s+", None), # skip other whitespace
])
def tokenize(s):
"""
Yields pairs of the form (line_num, (token_type, token_contents))
given a string containing the contents of a CMakeLists file.
"""
toks, remainder = scanner.scan(s)
line_num = 1
if remainder != '':
msg = 'Unrecognized tokens at line %s: %s' % (line_num, remainder)
raise ValueError(msg)
for tok_type, tok_contents in toks:
yield line_num, (tok_type, tok_contents.strip())
line_num += tok_contents.count('\n')