improve performance parsing speed by about 6% by caching compiled regexes (#131)
* improve performance parsing speed by about 6% by caching compiled regexes
This commit is contained in:
		
							parent
							
								
									9847bdf66b
								
							
						
					
					
						commit
						5a59c36646
					
				| @ -1,5 +1,13 @@ | ||||
| import re | ||||
| 
 | ||||
| # - Match all characters after start of telegram except for the start | ||||
| # itself again '^\/]+', which eliminates incomplete preceding telegrams. | ||||
| # - Do non greedy match using '?' so start is matched up to the first | ||||
| # checksum that's found. | ||||
| # - The checksum is optional '{0,4}' because not all telegram versions | ||||
| # support it. | ||||
| _FIND_TELEGRAMS_REGEX = re.compile(r"\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n", re.DOTALL) | ||||
| 
 | ||||
| 
 | ||||
| class TelegramBuffer(object): | ||||
|     """ | ||||
| @ -8,14 +16,14 @@ class TelegramBuffer(object): | ||||
|     """ | ||||
| 
 | ||||
|     def __init__(self): | ||||
|         self._buffer = '' | ||||
|         self._buffer = "" | ||||
| 
 | ||||
|     def get_all(self): | ||||
|         """ | ||||
|         Remove complete telegrams from buffer and yield them. | ||||
|         :rtype generator: | ||||
|         """ | ||||
|         for telegram in self._find_telegrams(): | ||||
|         for telegram in _FIND_TELEGRAMS_REGEX.findall(self._buffer): | ||||
|             self._remove(telegram) | ||||
|             yield telegram | ||||
| 
 | ||||
| @ -37,21 +45,3 @@ class TelegramBuffer(object): | ||||
|         index = self._buffer.index(telegram) + len(telegram) | ||||
| 
 | ||||
|         self._buffer = self._buffer[index:] | ||||
| 
 | ||||
|     def _find_telegrams(self): | ||||
|         """ | ||||
|         Find complete telegrams in buffer from  start ('/') till ending | ||||
|         checksum ('!AB12\r\n'). | ||||
|         :rtype: list | ||||
|         """ | ||||
|         # - Match all characters after start of telegram except for the start | ||||
|         # itself again '^\/]+', which eliminates incomplete preceding telegrams. | ||||
|         # - Do non greedy match using '?' so start is matched up to the first | ||||
|         # checksum that's found. | ||||
|         # - The checksum is optional '{0,4}' because not all telegram versions | ||||
|         # support it. | ||||
|         return re.findall( | ||||
|             r'\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n', | ||||
|             self._buffer, | ||||
|             re.DOTALL | ||||
|         ) | ||||
|  | ||||
| @ -25,8 +25,13 @@ class TelegramParser(object): | ||||
|             telegram DSMR version (v4 and up). | ||||
|         :type telegram_specification: dict | ||||
|         """ | ||||
|         self.telegram_specification = telegram_specification | ||||
|         self.apply_checksum_validation = apply_checksum_validation | ||||
|         self.telegram_specification = telegram_specification | ||||
|         # Regexes are compiled once to improve performance | ||||
|         self.telegram_specification_regexes = { | ||||
|             signature: re.compile(signature, re.DOTALL) | ||||
|             for signature in self.telegram_specification['objects'].keys() | ||||
|         } | ||||
| 
 | ||||
|     def parse(self, telegram_data, encryption_key="", authentication_key=""):  # noqa: C901 | ||||
|         """ | ||||
| @ -80,7 +85,7 @@ class TelegramParser(object): | ||||
|         telegram = Telegram() | ||||
| 
 | ||||
|         for signature, parser in self.telegram_specification['objects'].items(): | ||||
|             pattern = re.compile(signature, re.DOTALL) | ||||
|             pattern = self.telegram_specification_regexes[signature] | ||||
|             matches = pattern.findall(telegram_data) | ||||
| 
 | ||||
|             # Some signatures are optional and may not be present, | ||||
|  | ||||
		Loading…
	
		Reference in New Issue
	
	Block a user