From 981bf85cf0e03e79dfc722bb65ca496824c5ddbb Mon Sep 17 00:00:00 2001 From: Chrys Date: Tue, 22 Oct 2019 00:45:38 +0200 Subject: [PATCH] add regex; --- config/punctuation/de.conf | 43 ++++++++++--------- config/punctuation/default.conf | 43 +++++++++---------- config/punctuation/en.conf | 43 ++++++++++--------- config/punctuation/es.conf | 43 ++++++++++--------- config/punctuation/fr.conf | 43 ++++++++++--------- config/punctuation/pl.conf | 43 ++++++++++--------- .../core/punctuationManager.py | 9 ++-- 7 files changed, 137 insertions(+), 130 deletions(-) diff --git a/config/punctuation/de.conf b/config/punctuation/de.conf index 81f33903..cebc1805 100644 --- a/config/punctuation/de.conf +++ b/config/punctuation/de.conf @@ -44,29 +44,30 @@ _:===:Lienie unten =:===:Istgleich [customDict] + [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/config/punctuation/default.conf b/config/punctuation/default.conf index 22cdc00e..69cf5128 100644 --- a/config/punctuation/default.conf +++ b/config/punctuation/default.conf @@ -44,31 +44,30 @@ _:===:line =:===:equals [customDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/config/punctuation/en.conf b/config/punctuation/en.conf index 005a7bb7..686ce536 100644 --- a/config/punctuation/en.conf +++ b/config/punctuation/en.conf @@ -44,29 +44,30 @@ _:===:line =:===:equals [customDict] + [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/config/punctuation/es.conf b/config/punctuation/es.conf index 8bfe9612..03325679 100644 --- a/config/punctuation/es.conf +++ b/config/punctuation/es.conf @@ -44,29 +44,30 @@ _:===:subrayado =:===:igual [customDict] + [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/config/punctuation/fr.conf b/config/punctuation/fr.conf index 475db2be..7bff0ed1 100644 --- a/config/punctuation/fr.conf +++ b/config/punctuation/fr.conf @@ -44,29 +44,30 @@ _:===:souligné =:===:égale à [customDict] + [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/config/punctuation/pl.conf b/config/punctuation/pl.conf index 75152d0c..3cec60c5 100644 --- a/config/punctuation/pl.conf +++ b/config/punctuation/pl.conf @@ -44,29 +44,30 @@ _:===:podkreślnik =:===:równa się [customDict] + [emoticonDict] -# This dictionary uses regexp, so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. -[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves -[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses -[\s*|'|"][\s*|'|"]:-/:===:confused -[\s*|'|"][\s*|'|"]-\.-:===:bugged -[\s*|'|"][\s*|'|"]>\.<:===:laughing -[\s*|'|"][\s*|'|"]8-X:===:skull -[\s*|'|"][\s*|'|"]>:\):===:evil smile -[\s*|'|"][\s*|'|"]>:-\):===:evil smile -[\s*|'|"][\s*|'|"]\\o/:===:Hurray -[\s*|'|"][\s*|'|"]:/:===:confused -[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses -[\s*|'|"]:D[\s*|'|"]:===:laugh -[\s*|'|"];\)[\s*|'|"]:===:wink -[\s*|'|"]XD[\s*|'|"]:===:LOL -[\s*|'|"]:-\)[\s*|'|"]:===:smile -[\s*|'|"]:\)[\s*|'|"]:===:smile -[\s*|'|"]->[\s*|'|"]:===:arrow right +# This dictionary uses regexp when prefixed with "regex;", so be sure to escape anything that would be parsed by regexp, e.g. *, ., ^, $, etc. +regex;[\s*|'|"][\s*|'|"]<{-.-}>:===:Raves +regex;[\s*|'|"][\s*|'|"]8-\):===:smile with sunglasses +regex;[\s*|'|"][\s*|'|"]:-/:===:confused +regex;[\s*|'|"][\s*|'|"]-\.-:===:bugged +regex;[\s*|'|"][\s*|'|"]>\.<:===:laughing +regex;[\s*|'|"][\s*|'|"]8-X:===:skull +regex;[\s*|'|"][\s*|'|"]>:\):===:evil smile +regex;[\s*|'|"][\s*|'|"]>:-\):===:evil smile +regex;[\s*|'|"][\s*|'|"]\\o/:===:Hurray +regex;[\s*|'|"][\s*|'|"]:/:===:confused +regex;[\s*|'|"][\s*|'|"]8\):===:smile with sunglasses +regex;[\s*|'|"]:D[\s*|'|"]:===:laugh +regex;[\s*|'|"];\)[\s*|'|"]:===:wink +regex;[\s*|'|"]XD[\s*|'|"]:===:LOL +regex;[\s*|'|"]:-\)[\s*|'|"]:===:smile +regex;[\s*|'|"]:\)[\s*|'|"]:===:smile +regex;[\s*|'|"]->[\s*|'|"]:===:arrow right # example for arrow left #(?:[ |^])(<-)(?:[ ,.!?$]):===:arrow left # or #([ |^])<-([ ,.!?$]):===:arrow left\2 -[\s*|'|"]<-[\s*|'|"]:===:arrow left -[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? -[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile +regex;[\s*|'|"]<-[\s*|'|"]:===:arrow left +regex;[\s*|'|"][O|o][O|o][\s*|'|"]:===:WTF? +regex;[\s*|'|"]\^\^[\s*|'|"]:===:enjoy smile diff --git a/src/fenrirscreenreader/core/punctuationManager.py b/src/fenrirscreenreader/core/punctuationManager.py index 57416204..9e3b28a5 100644 --- a/src/fenrirscreenreader/core/punctuationManager.py +++ b/src/fenrirscreenreader/core/punctuationManager.py @@ -39,9 +39,12 @@ class punctuationManager(): if customDict: for key,item in customDict.items(): try: - resultText = re.sub(str(key), seperator + str(item) + seperator, resultText) - except: - resultText = resultText.replace(str(key),seperator + str(item) + seperator) + if item.upper().startswith('REGEX;') and (len(item) > 6): + resultText = re.sub(str(key), seperator + str(item[6:]) + seperator, resultText) + else: + resultText = resultText.replace(str(key),seperator + str(item) + seperator) + except Exception as e: + self.env['runtime']['debug'].writeDebugOut("useCustomDict replace:'" + key + "' with '" + item +"' failed:" + str(e),debug.debugLevel.ERROR, onAnyLevel=False) return resultText def usePunctuationDict(self, text, punctuationDict, punctuation): resultText = str(text)