Source code for scripts.replace_tokens_by_dict

import argparse
import os
from glob import glob

desc = """Replaces the tokens according to a dictionary."""


[docs]def read_dictionary_from_file(filename): f = open(filename, encoding="utf-8") d = {} for line in f: fields = line.strip().split() d[fields[0]] = fields[1] d[fields[0].strip().encode('ascii', 'xmlcharrefreplace').decode()] = \ fields[1].strip().encode('ascii', 'xmlcharrefreplace').decode() print(d) return d
[docs]def main(args): os.makedirs(args.out_dir, exist_ok=True) replacement_dict = read_dictionary_from_file(args.dict) for pattern in args.filenames: for filename in sorted(glob(pattern)) or [pattern]: basename = os.path.basename(filename) with open(os.path.join(args.out_dir, basename), "w", encoding="utf-8") as outfile: with open(filename, encoding="utf-8") as infile: xml_string = infile.read() for k, v in replacement_dict.items(): if args.whole_word: xml_string = xml_string.replace("text=\"" + k + "\"", "text=\"" + v + "\"") else: xml_string = xml_string.replace(k, v) print(xml_string, file=outfile, end="") print("Done")
if __name__ == "__main__": argparser = argparse.ArgumentParser(description=desc) argparser.add_argument("filenames", nargs="+", help="files to replace tokens in") argparser.add_argument("-o", "--out-dir", default=".", help="output directory for changed XMLs") argparser.add_argument("-d", "--dict", help="filename to read the dictionary from. the file should have one line per entry, in the" " format of <original text> <replaced text>") argparser.add_argument("-w", "--whole-word", action="store_true", help="replace whole word") main(argparser.parse_args())