Use Python-slugify to generate Freezefile slugs.

This lets me remove a lot of unneeded code from the utils, and the
dependency improves the quality a lot.
This commit is contained in:
Friedrich Lindenberg 2013-11-15 22:48:38 +02:00
parent 060691f818
commit 65198dc74a
2 changed files with 3 additions and 44 deletions

View File

@ -3,7 +3,8 @@ import logging
import re
import locale
from dataset.util import FreezeException, slug
from dataset.util import FreezeException
from slugify import slugify
TMPL_KEY = re.compile("{{([^}]*)}}")
@ -11,7 +12,7 @@ TMPL_KEY = re.compile("{{([^}]*)}}")
OPERATIONS = {
'identity': lambda x: x,
'lower': lambda x: unicode(x).lower(),
'slug': slug
'slug': slugify
}

View File

@ -10,45 +10,3 @@ class DatasetException(Exception):
class FreezeException(DatasetException):
pass
def normalize(text):
""" Simplify a piece of text to generate a more canonical
representation. This involves lowercasing, stripping trailing
spaces, removing symbols, diacritical marks (umlauts) and
converting all newlines etc. to single spaces.
"""
if not isinstance(text, unicode):
text = unicode(text)
text = text.lower()
decomposed = ucnorm('NFKD', text)
filtered = []
for char in decomposed:
cat = category(char)
if cat.startswith('C'):
filtered.append(' ')
elif cat.startswith('M'):
# marks, such as umlauts
continue
elif cat.startswith('Z'):
# newlines, non-breaking etc.
filtered.append(' ')
elif cat.startswith('S'):
# symbols, such as currency
continue
else:
filtered.append(char)
text = u''.join(filtered)
while ' ' in text:
text = text.replace(' ', ' ')
text = text.strip()
return ucnorm('NFKC', text)
def slug(text):
""" Create a version of a string convenient for use in a URL
or file name. """
text = normalize(text)
text = text.replace(u'ß', 'ss')
text = '-'.join(filter(lambda t: len(t), \
SLUG_REMOVE.split(text)))
return text.lower()