improve performance parsing speed by about 6% by caching compiled regexes (#131)
* improve performance parsing speed by about 6% by caching compiled regexes
This commit is contained in:
parent
9847bdf66b
commit
5a59c36646
@ -1,5 +1,13 @@
|
||||
import re
|
||||
|
||||
# - Match all characters after start of telegram except for the start
|
||||
# itself again '^\/]+', which eliminates incomplete preceding telegrams.
|
||||
# - Do non greedy match using '?' so start is matched up to the first
|
||||
# checksum that's found.
|
||||
# - The checksum is optional '{0,4}' because not all telegram versions
|
||||
# support it.
|
||||
_FIND_TELEGRAMS_REGEX = re.compile(r"\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n", re.DOTALL)
|
||||
|
||||
|
||||
class TelegramBuffer(object):
|
||||
"""
|
||||
@ -8,14 +16,14 @@ class TelegramBuffer(object):
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._buffer = ''
|
||||
self._buffer = ""
|
||||
|
||||
def get_all(self):
|
||||
"""
|
||||
Remove complete telegrams from buffer and yield them.
|
||||
:rtype generator:
|
||||
"""
|
||||
for telegram in self._find_telegrams():
|
||||
for telegram in _FIND_TELEGRAMS_REGEX.findall(self._buffer):
|
||||
self._remove(telegram)
|
||||
yield telegram
|
||||
|
||||
@ -37,21 +45,3 @@ class TelegramBuffer(object):
|
||||
index = self._buffer.index(telegram) + len(telegram)
|
||||
|
||||
self._buffer = self._buffer[index:]
|
||||
|
||||
def _find_telegrams(self):
|
||||
"""
|
||||
Find complete telegrams in buffer from start ('/') till ending
|
||||
checksum ('!AB12\r\n').
|
||||
:rtype: list
|
||||
"""
|
||||
# - Match all characters after start of telegram except for the start
|
||||
# itself again '^\/]+', which eliminates incomplete preceding telegrams.
|
||||
# - Do non greedy match using '?' so start is matched up to the first
|
||||
# checksum that's found.
|
||||
# - The checksum is optional '{0,4}' because not all telegram versions
|
||||
# support it.
|
||||
return re.findall(
|
||||
r'\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n',
|
||||
self._buffer,
|
||||
re.DOTALL
|
||||
)
|
||||
|
@ -25,8 +25,13 @@ class TelegramParser(object):
|
||||
telegram DSMR version (v4 and up).
|
||||
:type telegram_specification: dict
|
||||
"""
|
||||
self.telegram_specification = telegram_specification
|
||||
self.apply_checksum_validation = apply_checksum_validation
|
||||
self.telegram_specification = telegram_specification
|
||||
# Regexes are compiled once to improve performance
|
||||
self.telegram_specification_regexes = {
|
||||
signature: re.compile(signature, re.DOTALL)
|
||||
for signature in self.telegram_specification['objects'].keys()
|
||||
}
|
||||
|
||||
def parse(self, telegram_data, encryption_key="", authentication_key=""): # noqa: C901
|
||||
"""
|
||||
@ -80,7 +85,7 @@ class TelegramParser(object):
|
||||
telegram = Telegram()
|
||||
|
||||
for signature, parser in self.telegram_specification['objects'].items():
|
||||
pattern = re.compile(signature, re.DOTALL)
|
||||
pattern = self.telegram_specification_regexes[signature]
|
||||
matches = pattern.findall(telegram_data)
|
||||
|
||||
# Some signatures are optional and may not be present,
|
||||
|
Loading…
Reference in New Issue
Block a user