All Downloads are FREE. Search and download functionalities are using the official Maven repository.

te.recipe.rewrite-ai-search.0.19.0.source-code.spellcheck_comment_french.py Maven / Gradle / Ivy

There is a newer version: 0.21.0
Show newest version
#
# Copyright 2021 the original author or authors.
# 

# Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at #

# https://www.apache.org/licenses/LICENSE-2.0 #

# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # import gradio as gr # 3.23.0 from symspellpy import SymSpell, Verbosity import re accents = ['à', 'â', 'æ', 'ç', 'é', 'è', 'ê', 'ë', 'î', 'ï', 'ô', 'œ', 'ù', 'û', 'ü', 'ÿ', 'À', 'Â', 'Æ', 'Ç', 'É', 'È', 'Ê', 'Ë', 'Î', 'Ï', 'Ô', 'Œ', 'Ù', 'Û', 'Ü', 'Ÿ'] def split_word(word): match = re.match(r"^(.*?[?�]?)([^a-zA-Z?�]*)$", word) if match: return match.group(1), match.group(2) else: return word, '' path = "/app/fr-1M.txt" sym_spell = SymSpell(max_dictionary_edit_distance=2, prefix_length=7) if not (sym_spell.load_dictionary(path, term_index=1, count_index=2, separator="\t")): raise Exception("Couldn't find the dictionnary.") def fix_comment_french(comments): comment_lines = comments.split("\n") def fix_single_line(comment): words = comment.split(" ") fixed_words = [] for index, word in enumerate(words): word, suffix = split_word(word) if word[-2:]=="??": word=word[:-1] suffix= "?"+suffix if "?" in word or "�" in word: if word == "?" and (index==len(words)-1 or words[index+1][0].isupper()): fixed_words.append(word) continue suggestions = sym_spell.lookup(word, Verbosity.CLOSEST, max_edit_distance=word.count("?")+word.count("�")) if len(suggestions)==0: fixed_words.append(word) else: fixed_word = suggestions[0].term if word[-1] == "?" and fixed_word[-1] not in accents: if word[:-1].lower()==fixed_word.lower(): fixed_word = word +"?" else: fixed_word = fixed_word+"?" if not any(accent in fixed_word for accent in accents): fixed_words.append(word) else: fixed_words.append(fixed_word+suffix) else: fixed_words.append(word+suffix) return " ".join(fixed_words) lines = [fix_single_line(comment) for comment in comment_lines] return "\n".join(lines) gr.Interface(fn=fix_comment_french, inputs=["text"], outputs="text").launch(server_port=7866)





© 2015 - 2024 Weber Informatics LLC | Privacy Policy