Source code for ucca_db.upload

from argparse import ArgumentParser
from xml.etree.ElementTree import tostring

from ucca import convert
from ucca.ioutil import get_passages_with_progress_bar
from ucca_db.api import CONNECTION, write_to_db

desc = "Upload passages to old UCCA annotation app"


[docs]def upload_passage(xml_root, site_filename=None, verbose=False, **kwargs): decoded = tostring(xml_root).decode() if site_filename: with open(site_filename, "w", encoding="utf-8") as f: print(decoded, file=f) if verbose: print("Wrote '%s'" % site_filename) return write_to_db(xml=decoded, **kwargs)
[docs]def main(args): filenames = list(args.passages) if args.filenames: with open(args.filenames, encoding="utf-8") as f: filenames += list(filter(None, map(str.strip, f))) with open(args.out, "w", encoding="utf-8") as f: for passage in get_passages_with_progress_bar(filenames): out = upload_passage(convert.to_site(passage), verbose=args.verbose, site_filename=passage.ID + "_site_upload.xml" if args.write_site else None, db_name=args.db_name, host_name=args.host_name, new_pid=passage.ID, new_prid=args.project_id, username=args.username) print(passage.ID, out, file=f) if args.verbose: print("Uploaded passage %s with xid=%s" % (passage.ID, out)) if CONNECTION is not None: CONNECTION.commit() print("Wrote '%s'" % args.out)
if __name__ == "__main__": argparser = ArgumentParser(description=desc) argparser.add_argument("passages", nargs="*", help="the corpus, given as xml/pickle file names") argparser.add_argument("-f", "--filenames", help="read input passages filenames from file rather than command line") argparser.add_argument("-d", "--db-name", default="work", help="database name") argparser.add_argument("-H", "--host-name", default="pgserver", help="host name") argparser.add_argument("-p", "--project-id", default="63", help="project ID") argparser.add_argument("-u", "--username", default="danielh", help="username") argparser.add_argument("-o", "--out", default="xids.txt", help="file to write created XML IDs to") argparser.add_argument("--write-site", action="store_true", help="write site format for debugging before upload") argparser.add_argument("-v", "--verbose", action="store_true", help="print tagged text for each passage") main(argparser.parse_args())