From 2f40ef1826c4c4701f9b316cf6549798c72b2b79 Mon Sep 17 00:00:00 2001 From: LecygneNoir Date: Sat, 9 Mar 2019 11:34:57 +0100 Subject: [PATCH] Simplify cleanString function to prepare python3 compatibility --- lib/utils.py | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/lib/utils.py b/lib/utils.py index 772ff6b..63083b0 100644 --- a/lib/utils.py +++ b/lib/utils.py @@ -3,9 +3,10 @@ from ConfigParser import RawConfigParser, NoOptionError, NoSectionError from os.path import dirname, splitext, basename, isfile +import re from os import devnull from subprocess import check_call, CalledProcessError, STDOUT -import unicodedata +import unidecode import logging ### CATEGORIES ### @@ -195,16 +196,8 @@ def upcaseFirstLetter(s): def cleanString(toclean): - toclean = toclean.split(' ') - cleaned = '' - for s in toclean: - if s == '': - continue - strtoclean = unicodedata.normalize('NFKD', unicode (s, 'utf-8')).encode('ASCII', 'ignore') - strtoclean = ''.join(e for e in strtoclean if e.isalnum()) - if strtoclean == '': - continue - strtoclean = upcaseFirstLetter(strtoclean) - cleaned = cleaned + strtoclean + toclean = toclean.decode('utf-8') + toclean = unidecode.unidecode(toclean) + cleaned = re.sub('[^A-Za-z0-9]+', '', toclean) return cleaned