Source code for scripts.evaluate_standard

#!/usr/bin/env python3
"""The evaluation script for UCCA layer 1."""
from argparse import ArgumentParser
from itertools import repeat

from ucca import evaluation, constructions, ioutil


[docs]def main(args):
    guessed, ref, ref_yield_tags = [None if x is None else ioutil.read_files_and_dirs((x,))
                                    for x in (args.guessed, args.ref, args.ref_yield_tags)]
    if args.match_by_id:
        guessed = match_by_id(guessed, ref)
        ref_yield_tags = match_by_id(ref_yield_tags, ref)
    results = []
    eval_type = evaluation.UNLABELED if args.unlabeled else evaluation.LABELED
    verbose = args.verbose or len(guessed) == 1
    for g, r, ryt in zip(guessed, ref, ref_yield_tags or repeat(None)):
        if len(guessed) > 1:
            print("Evaluating %s%s" % (g.ID, ":" if args.verbose else "..."), end="\r", flush=True)
        if args.verbose:
            print()
        result = evaluation.evaluate(g, r, constructions=args.constructions, units=args.units, fscore=args.fscore,
                                     errors=args.errors, verbose=verbose,
                                     normalize=args.normalize, ref_yield_tags=ryt,
                                     eval_type=evaluation.UNLABELED if args.unlabeled else None)
        if verbose:
            if args.errors:
                result.print_confusion_matrix(as_table=args.as_table)
            if not args.quiet:
                print_f1(result, eval_type)
        results.append(result)
    summarize(args, results, eval_type=eval_type)


[docs]def match_by_id(guessed, ref):
    if guessed is None:
        return None
    if len(guessed) != len(ref):
        raise ValueError("Number of passages to compare does not match: %d != %d" % (len(guessed), len(ref)))
    if len(guessed) > 1:
        guessed_by_id = {}
        for g in guessed:
            print("Reading %s..." % g.ID, end="\r", flush=True)
            guessed_by_id[g.ID] = g
        ids = [p.ID for p in ref]
        try:
            return [guessed_by_id[i] for i in ids]
        except KeyError as e:
            raise ValueError("Passage IDs do not match") from e
    return guessed


[docs]def print_f1(result, eval_type):
    print("Average %s F1 score: %.3f" % (eval_type, result.average_f1(eval_type)))


[docs]def summarize(args, results, eval_type):
    summary = evaluation.Scores.aggregate(results)
    if len(results) > 1:
        if args.verbose:
            print("Aggregated scores:")
        else:
            if not args.quiet:
                if args.fscore:
                    summary.print()
                elif args.errors:
                    print(args.guessed, args.ref)
                    summary.print_confusion_matrix(as_table=args.as_table)
        if not args.quiet:
            print_f1(summary, eval_type=eval_type)
    if args.out_file:
        with open(args.out_file, "w", encoding="utf-8") as f:
            print(*summary.titles(eval_type=eval_type), sep=",", file=f)
            for result in results:
                print(*result.fields(eval_type=eval_type), sep=",", file=f)
        print("Wrote '%s'" % args.out_file)
    for filename, counts in ((args.summary_file, False), (args.counts_file, True)):
        if filename:
            with open(filename, "w", encoding="utf-8") as f:
                print(*summary.titles(eval_type=eval_type, counts=counts), sep=",", file=f)
                print(*summary.fields(eval_type=eval_type, counts=counts), sep=",", file=f)
            print("Wrote '%s'" % filename)
    if args.errors_file:
        with open(args.errors_file, "w", encoding="utf-8") as f:
            summary.print_confusion_matrix(sep=",", as_table=args.as_table, file=f)
        print("Wrote '%s'" % args.errors_file)


[docs]def check_args(args):
    if args.out_file or args.summary_file or not (args.units or args.fscore or args.errors):
        args.fscore = True
    if args.errors_file:
        args.errors = True
    return args


if __name__ == "__main__":
    argparser = ArgumentParser(description="Compare two UCCA passages or two directories containing passage files.")
    argparser.add_argument("guessed", help="xml/pickle file name for the guessed annotation, or directory of files")
    argparser.add_argument("ref", help="xml/pickle file name for the reference annotation, or directory of files")
    argparser.add_argument("-r", "--ref-yield-tags", help="xml/pickle file name for reference used for extracting edge "
                                                          "categories for fine-grained annotation "
                                                          "(--constructions categories), or directory of files")
    argparser.add_argument("-u", "--units", action="store_true",
                           help="the units the annotations have in common, and those each has separately")
    argparser.add_argument("-f", "--fscore", action="store_true",
                           help="outputs the traditional P,R,F instead of the scene structure evaluation")
    argparser.add_argument("-e", "--errors", action="store_true",
                           help="prints the error distribution according to its frequency")
    argparser.add_argument("-N", "--no-normalize", dest="normalize", action="store_false",
                           help="do not normalize passages before evaluation")
    argparser.add_argument("-I", "--no-match-by-id", dest="match_by_id", action="store_false",
                           help="do not match passages by ID, instead matching by order")
    argparser.add_argument("--as-table", action="store_true", help="if given --errors, print confusion matrix as table")
    argparser.add_argument("--unlabeled", action="store_true", help="only unlabeled evaluation")
    argparser.add_argument("--out-file", help="file to write results for each evaluated passage to, in CSV format")
    argparser.add_argument("--summary-file", help="file to write aggregated scores to, in CSV format")
    argparser.add_argument("--counts-file", help="file to write aggregated counts to, in CSV format")
    argparser.add_argument("--errors-file", help="file to write aggregated confusion matrix to, in CSV format")
    group = argparser.add_mutually_exclusive_group()
    group.add_argument("-v", "--verbose", action="store_true",
                       help="prints the results for every single pair (always true if there is only one pair)")
    group.add_argument("-q", "--quiet", action="store_true", help="do not print anything")
    constructions.add_argument(argparser)
    main(check_args(argparser.parse_args()))
Source code for scripts.evaluate_standard

UCCA

Navigation

Related Topics