improve performance parsing speed by about 6% by caching compiled regexes (#131)

* improve performance parsing speed by about 6% by caching compiled regexes
This commit is contained in:
Nigel Dokter 2023-04-12 10:52:11 +02:00 committed by GitHub
parent 9847bdf66b
commit 5a59c36646
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 17 additions and 22 deletions

View File

@ -1,5 +1,13 @@
import re
# - Match all characters after start of telegram except for the start
# itself again '^\/]+', which eliminates incomplete preceding telegrams.
# - Do non greedy match using '?' so start is matched up to the first
# checksum that's found.
# - The checksum is optional '{0,4}' because not all telegram versions
# support it.
_FIND_TELEGRAMS_REGEX = re.compile(r"\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n", re.DOTALL)
class TelegramBuffer(object):
"""
@ -8,14 +16,14 @@ class TelegramBuffer(object):
"""
def __init__(self):
self._buffer = ''
self._buffer = ""
def get_all(self):
"""
Remove complete telegrams from buffer and yield them.
:rtype generator:
"""
for telegram in self._find_telegrams():
for telegram in _FIND_TELEGRAMS_REGEX.findall(self._buffer):
self._remove(telegram)
yield telegram
@ -37,21 +45,3 @@ class TelegramBuffer(object):
index = self._buffer.index(telegram) + len(telegram)
self._buffer = self._buffer[index:]
def _find_telegrams(self):
"""
Find complete telegrams in buffer from start ('/') till ending
checksum ('!AB12\r\n').
:rtype: list
"""
# - Match all characters after start of telegram except for the start
# itself again '^\/]+', which eliminates incomplete preceding telegrams.
# - Do non greedy match using '?' so start is matched up to the first
# checksum that's found.
# - The checksum is optional '{0,4}' because not all telegram versions
# support it.
return re.findall(
r'\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n',
self._buffer,
re.DOTALL
)

View File

@ -25,8 +25,13 @@ class TelegramParser(object):
telegram DSMR version (v4 and up).
:type telegram_specification: dict
"""
self.telegram_specification = telegram_specification
self.apply_checksum_validation = apply_checksum_validation
self.telegram_specification = telegram_specification
# Regexes are compiled once to improve performance
self.telegram_specification_regexes = {
signature: re.compile(signature, re.DOTALL)
for signature in self.telegram_specification['objects'].keys()
}
def parse(self, telegram_data, encryption_key="", authentication_key=""): # noqa: C901
"""
@ -80,7 +85,7 @@ class TelegramParser(object):
telegram = Telegram()
for signature, parser in self.telegram_specification['objects'].items():
pattern = re.compile(signature, re.DOTALL)
pattern = self.telegram_specification_regexes[signature]
matches = pattern.findall(telegram_data)
# Some signatures are optional and may not be present,