Source code for ucca.diffutil
import sys
from ucca.ioutil import passage2file
[docs]def diff_passages(true_passage, pred_passage, write=False):
"""
Debug method to print missing or mistaken attributes, nodes and edges
"""
lines = list()
if not true_passage._attrib.equals(pred_passage._attrib):
lines.append("Passage attributes mismatch: %s, %s" %
(true_passage._attrib, pred_passage._attrib))
try:
for lid, l1 in true_passage._layers.items():
l2 = true_passage.layer(lid)
if not l1._attrib.equals(l2._attrib):
lines.append("Layer %d attributes mismatch: %s, %s" %
(lid, l1._attrib, l2._attrib))
except KeyError: # no layer with same ID found
lines.append("Missing layer: %s, %s" %
(true_passage._layers, pred_passage._layers))
pred_ids = {node.extra.get("remarks", node.ID): node
for node in pred_passage.missing_nodes(true_passage)}
true_ids = {node.ID: node
for node in true_passage.missing_nodes(pred_passage)}
for pred_id, pred_node in list(pred_ids.items()):
true_node = true_ids.get(pred_id)
if true_node:
pred_ids.pop(pred_id)
true_ids.pop(pred_id)
pred_edges = {edge.tag + "->" + edge.child.ID: edge for edge in
pred_node.missing_edges(true_node)}
true_edges = {edge.tag + "->" + edge.child.ID: edge for edge in
true_node.missing_edges(pred_node)}
intersection = set(pred_edges).intersection(set(true_edges))
pred_edges = {s: edge for s, edge in pred_edges.items() if s not in intersection}
true_edges = {s: edge for s, edge in true_edges.items() if s not in intersection}
node_lines = []
if not pred_node._attrib.equals(true_node._attrib):
node_lines.append(" Attributes mismatch: %s, %s" %
(sorted(true_node._attrib.items()), sorted(pred_node._attrib.items())))
if pred_edges:
node_lines.append(" Mistake edges: %s" % ", ".join(pred_edges))
if true_edges:
node_lines.append(" Missing edges: %s" % ", ".join(true_edges))
if node_lines:
lines.append("For node " + pred_id + ":")
lines.extend(node_lines)
if pred_ids:
lines.append("Mistake nodes: %s" % ", ".join(pred_ids))
if true_ids:
lines.append("Missing nodes: %s" % ", ".join(true_ids))
if write and lines:
outfile = "%s.xml" % true_passage.ID
sys.stderr.write("Writing passage '%s'...\n" % outfile)
passage2file(true_passage, outfile)
outfile = "%s_pred.xml" % pred_passage.ID
sys.stderr.write("Writing passage '%s'...\n" % outfile)
passage2file(pred_passage, outfile)
return "\n" + "\n".join(lines)