improve performance parsing speed by about 6% by caching compiled regexes (#131)
* improve performance parsing speed by about 6% by caching compiled regexes
This commit is contained in:
		
							parent
							
								
									9847bdf66b
								
							
						
					
					
						commit
						5a59c36646
					
				| @ -1,5 +1,13 @@ | |||||||
| import re | import re | ||||||
| 
 | 
 | ||||||
|  | # - Match all characters after start of telegram except for the start | ||||||
|  | # itself again '^\/]+', which eliminates incomplete preceding telegrams. | ||||||
|  | # - Do non greedy match using '?' so start is matched up to the first | ||||||
|  | # checksum that's found. | ||||||
|  | # - The checksum is optional '{0,4}' because not all telegram versions | ||||||
|  | # support it. | ||||||
|  | _FIND_TELEGRAMS_REGEX = re.compile(r"\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n", re.DOTALL) | ||||||
|  | 
 | ||||||
| 
 | 
 | ||||||
| class TelegramBuffer(object): | class TelegramBuffer(object): | ||||||
|     """ |     """ | ||||||
| @ -8,14 +16,14 @@ class TelegramBuffer(object): | |||||||
|     """ |     """ | ||||||
| 
 | 
 | ||||||
|     def __init__(self): |     def __init__(self): | ||||||
|         self._buffer = '' |         self._buffer = "" | ||||||
| 
 | 
 | ||||||
|     def get_all(self): |     def get_all(self): | ||||||
|         """ |         """ | ||||||
|         Remove complete telegrams from buffer and yield them. |         Remove complete telegrams from buffer and yield them. | ||||||
|         :rtype generator: |         :rtype generator: | ||||||
|         """ |         """ | ||||||
|         for telegram in self._find_telegrams(): |         for telegram in _FIND_TELEGRAMS_REGEX.findall(self._buffer): | ||||||
|             self._remove(telegram) |             self._remove(telegram) | ||||||
|             yield telegram |             yield telegram | ||||||
| 
 | 
 | ||||||
| @ -37,21 +45,3 @@ class TelegramBuffer(object): | |||||||
|         index = self._buffer.index(telegram) + len(telegram) |         index = self._buffer.index(telegram) + len(telegram) | ||||||
| 
 | 
 | ||||||
|         self._buffer = self._buffer[index:] |         self._buffer = self._buffer[index:] | ||||||
| 
 |  | ||||||
|     def _find_telegrams(self): |  | ||||||
|         """ |  | ||||||
|         Find complete telegrams in buffer from  start ('/') till ending |  | ||||||
|         checksum ('!AB12\r\n'). |  | ||||||
|         :rtype: list |  | ||||||
|         """ |  | ||||||
|         # - Match all characters after start of telegram except for the start |  | ||||||
|         # itself again '^\/]+', which eliminates incomplete preceding telegrams. |  | ||||||
|         # - Do non greedy match using '?' so start is matched up to the first |  | ||||||
|         # checksum that's found. |  | ||||||
|         # - The checksum is optional '{0,4}' because not all telegram versions |  | ||||||
|         # support it. |  | ||||||
|         return re.findall( |  | ||||||
|             r'\/[^\/]+?\![A-F0-9]{0,4}\0?\r\n', |  | ||||||
|             self._buffer, |  | ||||||
|             re.DOTALL |  | ||||||
|         ) |  | ||||||
|  | |||||||
| @ -25,8 +25,13 @@ class TelegramParser(object): | |||||||
|             telegram DSMR version (v4 and up). |             telegram DSMR version (v4 and up). | ||||||
|         :type telegram_specification: dict |         :type telegram_specification: dict | ||||||
|         """ |         """ | ||||||
|         self.telegram_specification = telegram_specification |  | ||||||
|         self.apply_checksum_validation = apply_checksum_validation |         self.apply_checksum_validation = apply_checksum_validation | ||||||
|  |         self.telegram_specification = telegram_specification | ||||||
|  |         # Regexes are compiled once to improve performance | ||||||
|  |         self.telegram_specification_regexes = { | ||||||
|  |             signature: re.compile(signature, re.DOTALL) | ||||||
|  |             for signature in self.telegram_specification['objects'].keys() | ||||||
|  |         } | ||||||
| 
 | 
 | ||||||
|     def parse(self, telegram_data, encryption_key="", authentication_key=""):  # noqa: C901 |     def parse(self, telegram_data, encryption_key="", authentication_key=""):  # noqa: C901 | ||||||
|         """ |         """ | ||||||
| @ -80,7 +85,7 @@ class TelegramParser(object): | |||||||
|         telegram = Telegram() |         telegram = Telegram() | ||||||
| 
 | 
 | ||||||
|         for signature, parser in self.telegram_specification['objects'].items(): |         for signature, parser in self.telegram_specification['objects'].items(): | ||||||
|             pattern = re.compile(signature, re.DOTALL) |             pattern = self.telegram_specification_regexes[signature] | ||||||
|             matches = pattern.findall(telegram_data) |             matches = pattern.findall(telegram_data) | ||||||
| 
 | 
 | ||||||
|             # Some signatures are optional and may not be present, |             # Some signatures are optional and may not be present, | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user