Source code for scripts.unique_roles

#!/usr/bin/env python3

import argparse
from collections import Counter

from ucca import layer1
from ucca.ioutil import get_passages_with_progress_bar

desc = """Finds edge tags that are empirically always unique: occur at most once in edges per node
"""


[docs]def main(args): out = args.direction == "out" roles = set(tag for name, tag in layer1.EdgeTags.__dict__.items() if isinstance(tag, str) and not name.startswith('__')) for passage in get_passages_with_progress_bar([args.directory]): for node in passage.layer(layer1.LAYER_ID).all: counts = Counter(edge.tag for edge in (node if out else node.incoming)) roles.difference_update(tag for tag, count in counts.items() if count > 1) lines = "\n".join(sorted(roles)) print(lines) if args.outfile: with open(args.outfile, "w", encoding="utf-8") as f: print(lines, file=f)
if __name__ == '__main__': argparser = argparse.ArgumentParser(description=desc) argparser.add_argument('-d', '--directory', required=True, help="directory with passage files to process") argparser.add_argument('-o', '--outfile', default="data/unique_roles.txt", help="output file for data") argparser.add_argument('-D', '--direction', default="out", help="direction of edges to check (out|in)") main(argparser.parse_args())