Simplify cleanString function to prepare python3 compatibility

5 years ago · 2f40ef1826
--- a/lib/utils.py
+++ b/lib/utils.py
@ -3,9 +3,10 @@

 from ConfigParser import RawConfigParser, NoOptionError, NoSectionError
 from os.path import dirname, splitext, basename, isfile
 import re
 from os import devnull
 from subprocess import check_call, CalledProcessError, STDOUT
 import unicodedata
 import unidecode
 import logging

 ### CATEGORIES ###
@ -195,16 +196,8 @@ def upcaseFirstLetter(s):


 def cleanString(toclean):
    toclean = toclean.split(' ')
    cleaned = ''
    for s in toclean:
        if s == '':
            continue
        strtoclean = unicodedata.normalize('NFKD', unicode (s, 'utf-8')).encode('ASCII', 'ignore')
        strtoclean = ''.join(e for e in strtoclean if e.isalnum())
        if strtoclean == '':
            continue
        strtoclean = upcaseFirstLetter(strtoclean)
        cleaned = cleaned + strtoclean
    toclean = toclean.decode('utf-8')
    toclean = unidecode.unidecode(toclean)
    cleaned = re.sub('[^A-Za-z0-9]+', '', toclean)

    return cleaned