subzero make_data uses unicode literals. Make it work in both python 2 and 3.

5 years ago · eda8880357
parent ecefa41c93
commit eda8880357
1 changed files with 86 additions and 85 deletions
--- a/libs/subzero/modification/dictionaries/make_data.py
+++ b/libs/subzero/modification/dictionaries/make_data.py
@ -1,6 +1,7 @@
 # coding=utf-8
-
 from __future__ import absolute_import
+from __future__ import unicode_literals
+
 import re
 import os
 import pprint
@ -25,95 +26,95 @@ for lang, grps in data.iteritems():
 SZ_FIX_DATA = {
    "eng": {
        "PartialWordsAlways": {
-            u"°x°": u"%",
-            u"compiete": u"complete",
-            u"Âs": u"'s",
-            u"ÃÂs": u"'s",
-            u"a/ion": u"ation",
-            u"at/on": u"ation",
-            u"l/an": u"lian",
-            u"lljust": u"ll just",
-            u" L ": u" I ",
-            u" l ": u" I ",
-            u"'sjust": u"'s just",
-            u"'tjust": u"'t just",
-            u"\";": u"'s",
+            "°x°": "%",
+            "compiete": "complete",
+            "Âs": "'s",
+            "ÃÂs": "'s",
+            "a/ion": "ation",
+            "at/on": "ation",
+            "l/an": "lian",
+            "lljust": "ll just",
+            " L ": " I ",
+            " l ": " I ",
+            "'sjust": "'s just",
+            "'tjust": "'t just",
+            "\";": "'s",
        },
        "WholeWords": {
-            u"I'11": u"I'll",
-            u"III'll": u"I'll",
-            u"Tun": u"Run",
-            u"pan'": u"part",
-            u"al'": u"at",
-            u"a re": u"are",
-            u"wail'": u"wait",
-            u"he)'": u"hey",
-            u"he)\"": u"hey",
-            u"He)'": u"Hey",
-            u"He)\"": u"Hey",
-            u"He)’": u"Hey",
-            u"Yea h": u"Yeah",
-            u"yea h": u"yeah",
-            u"h is": u"his",
-            u" 're ": u"'re ",
-            u"LAst": u"Last",
-            u"forthis": u"for this",
-            u"Ls": u"Is",
-            u"Iam": u"I am",
-            u"Ican": u"I can",
+            "I'11": "I'll",
+            "III'll": "I'll",
+            "Tun": "Run",
+            "pan'": "part",
+            "al'": "at",
+            "a re": "are",
+            "wail'": "wait",
+            "he)'": "hey",
+            "he)\"": "hey",
+            "He)'": "Hey",
+            "He)\"": "Hey",
+            "He)’": "Hey",
+            "Yea h": "Yeah",
+            "yea h": "yeah",
+            "h is": "his",
+            " 're ": "'re ",
+            "LAst": "Last",
+            "forthis": "for this",
+            "Ls": "Is",
+            "Iam": "I am",
+            "Ican": "I can",
        },
        "PartialLines": {
-            u"L know": u"I know",
-            u"L should": u"I should",
-            u"L do": u"I do",
-            u"L would": u"I would",
-            u"L could": u"I could",
-            u"L can": u"I can",
-            u"L happen": u"I happen",
-            u"L might": u"I might",
-            u"L have ": u"I have",
-            u"L had": u"I had",
-            u"L want": u"I want",
-            u"L was": u"I was",
-            u"L am": u"I am",
-            u"L will": u"I will",
-            u"L suggest": u"I suggest",
-            u"L think": u"I think",
-            u"L reckon": u"I reckon",
-            u"L like": u"I like",
-            u"L love": u"I love",
-            u"L don't": u"I don't",
-            u"L didn't": u"I didn't",
-            u"L wasn't": u"I wasnt't",
-            u"L haven't": u"I haven't",
-            u"L couldn't": u"I couldn't",
-            u"L won't": u"I won't",
-            u"H i": u"Hi",
+            "L know": "I know",
+            "L should": "I should",
+            "L do": "I do",
+            "L would": "I would",
+            "L could": "I could",
+            "L can": "I can",
+            "L happen": "I happen",
+            "L might": "I might",
+            "L have ": "I have",
+            "L had": "I had",
+            "L want": "I want",
+            "L was": "I was",
+            "L am": "I am",
+            "L will": "I will",
+            "L suggest": "I suggest",
+            "L think": "I think",
+            "L reckon": "I reckon",
+            "L like": "I like",
+            "L love": "I love",
+            "L don't": "I don't",
+            "L didn't": "I didn't",
+            "L wasn't": "I wasnt't",
+            "L haven't": "I haven't",
+            "L couldn't": "I couldn't",
+            "L won't": "I won't",
+            "H i": "Hi",
        },
        "BeginLines": {
-            u"l ": u"I ",
-            u"L ": u"I ",
+            "l ": "I ",
+            "L ": "I ",
        }
    },
    "nld": {
        "PartialWordsAlways": {
-            u"ט": u"è",
-            u"י": u"é",
-            u"כ": u"ë",
-            u"צ": u"ë",
-            u"ן": u"ï",
-            u"ף": u"ó",
-            u"א": u"à",
-            u"Iֻ": u"I",
-            u"č": u"è",
-            u"פ": u"o",
-            u"ם": u"i",
+            "ט": "è",
+            "י": "é",
+            "כ": "ë",
+            "צ": "ë",
+            "ן": "ï",
+            "ף": "ó",
+            "א": "à",
+            "Iֻ": "I",
+            "č": "è",
+            "פ": "o",
+            "ם": "i",
        },
    },
    "swe": {
        "PartialWordsAlways": {
-            u"ĺ": u"å",
-            u"Ĺ": u"Å",
+            "ĺ": "å",
+            "Ĺ": "Å",
        }
    }
 }
@ -136,16 +137,16 @@ if __name__ == "__main__":
            fetch_data = (
                    # group, item_name, pattern
                    ("WholeLines", "Line", None),
-                    ("WholeWords", "Word", lambda d: (ur"(?um)(\b|^)(?:" + u"|".join([re.escape(k) for k in d.keys()])
-                                                      + ur')(\b|$)') if d else None),
+                    ("WholeWords", "Word", lambda d: (r"(?um)(\b|^)(?:" + "|".join([re.escape(k) for k in list(d.keys())])
+                                                      + r')(\b|$)') if d else None),
                    ("PartialWordsAlways", "WordPart", None),
-                    ("PartialLines", "LinePart", lambda d: (ur"(?um)(?:(?<=\s)|(?<=^)|(?<=\b))(?:" +
-                                                            u"|".join([re.escape(k) for k in d.keys()]) +
-                                                            ur")(?:(?=\s)|(?=$)|(?=\b))") if d else None),
-                    ("BeginLines", "Beginning", lambda d: (ur"(?um)^(?:"+u"|".join([re.escape(k) for k in d.keys()])
-                                                           + ur')') if d else None),
-                    ("EndLines", "Ending", lambda d: (ur"(?um)(?:" + u"|".join([re.escape(k) for k in d.keys()]) +
-                                                      ur")$") if d else None,),
+                    ("PartialLines", "LinePart", lambda d: (r"(?um)(?:(?<=\s)|(?<=^)|(?<=\b))(?:" +
+                                                            "|".join([re.escape(k) for k in list(d.keys())]) +
+                                                            r")(?:(?=\s)|(?=$)|(?=\b))") if d else None),
+                    ("BeginLines", "Beginning", lambda d: (r"(?um)^(?:"+"|".join([re.escape(k) for k in list(d.keys())])
+                                                           + r')') if d else None),
+                    ("EndLines", "Ending", lambda d: (r"(?um)(?:" + "|".join([re.escape(k) for k in list(d.keys())]) +
+                                                      r")$") if d else None,),
            )

            data[lang] = dict((grp, {"data": OrderedDict(), "pattern": None}) for grp, item_name, pattern in fetch_data)