improve performance parsing speed by about 6% by caching compiled regexes (#131)

* improve performance parsing speed by about 6% by caching compiled regexes
2023-04-12 10:52:11 +02:00 · 2023-04-12 10:52:11 +02:00 · 5a59c36646
commit 5a59c36646
parent 9847bdf66b
2 changed files with 17 additions and 22 deletions
--- a/dsmr_parser/clients/telegram_buffer.py
+++ b/dsmr_parser/clients/telegram_buffer.py
@ -1,5 +1,13 @@
 import re
 # - Match all characters after start of telegram except for the start
 # itself again '^\/]+', which eliminates incomplete preceding telegrams.
 # - Do non greedy match using '?' so start is matched up to the first
 # checksum that's found.
 # - The checksum is optional '{0,4}' because not all telegram versions
 # support it.
 _FIND_TELEGRAMS_REGEX = re.compile(r"\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n", re.DOTALL)
 class TelegramBuffer(object):
    """
@ -8,14 +16,14 @@ class TelegramBuffer(object):
    """
    def __init__(self):
-        self._buffer = ''
+        self._buffer = ""
    def get_all(self):
        """
        Remove complete telegrams from buffer and yield them.
        :rtype generator:
        """
-        for telegram in self._find_telegrams():
+        for telegram in _FIND_TELEGRAMS_REGEX.findall(self._buffer):
            self._remove(telegram)
            yield telegram
@ -37,21 +45,3 @@ class TelegramBuffer(object):
        index = self._buffer.index(telegram) + len(telegram)
        self._buffer = self._buffer[index:]
    def _find_telegrams(self):
        """
        Find complete telegrams in buffer from  start ('/') till ending
        checksum ('!AB12\r\n').
        :rtype: list
        """
        # - Match all characters after start of telegram except for the start
        # itself again '^\/]+', which eliminates incomplete preceding telegrams.
        # - Do non greedy match using '?' so start is matched up to the first
        # checksum that's found.
        # - The checksum is optional '{0,4}' because not all telegram versions
        # support it.
        return re.findall(
            r'\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n',
            self._buffer,
            re.DOTALL
        )
--- a/dsmr_parser/parsers.py
+++ b/dsmr_parser/parsers.py
@ -25,8 +25,13 @@ class TelegramParser(object):
            telegram DSMR version (v4 and up).
        :type telegram_specification: dict
        """
        self.telegram_specification = telegram_specification
        self.apply_checksum_validation = apply_checksum_validation
        self.telegram_specification = telegram_specification
        # Regexes are compiled once to improve performance
        self.telegram_specification_regexes = {
            signature: re.compile(signature, re.DOTALL)
            for signature in self.telegram_specification['objects'].keys()
        }
    def parse(self, telegram_data, encryption_key="", authentication_key=""):  # noqa: C901
        """
@ -80,7 +85,7 @@ class TelegramParser(object):
        telegram = Telegram()
        for signature, parser in self.telegram_specification['objects'].items():
-            pattern = re.compile(signature, re.DOTALL)
+            pattern = self.telegram_specification_regexes[signature]
            matches = pattern.findall(telegram_data)
            # Some signatures are optional and may not be present,