mirror of
				https://github.com/explosion/spaCy.git
				synced 2025-11-01 00:17:44 +03:00 
			
		
		
		
	* Draft out initial Spans data structure * Initial span group commit * Basic span group support on Doc * Basic test for span group * Compile span_group.pyx * Draft addition of SpanGroup to DocBin * Add deserialization for SpanGroup * Add tests for serializing SpanGroup * Fix serialization of SpanGroup * Add EdgeC and GraphC structs * Add draft Graph data structure * Compile graph * More work on Graph * Update GraphC * Upd graph * Fix walk functions * Let Graph take nodes and edges on construction * Fix walking and getting * Add graph tests * Fix import * Add module with the SpanGroups dict thingy * Update test * Rename 'span_groups' attribute * Try to fix c++11 compilation * Fix test * Update DocBin * Try to fix compilation * Try to fix graph * Improve SpanGroup docstrings * Add doc.spans to documentation * Fix serialization * Tidy up and add docs * Update docs [ci skip] * Add SpanGroup.has_overlap * WIP updated Graph API * Start testing new Graph API * Update Graph tests * Update Graph * Add docstring Co-authored-by: Ines Montani <ines@ines.io>
		
			
				
	
	
		
			710 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Cython
		
	
	
	
	
	
			
		
		
	
	
			710 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			Cython
		
	
	
	
	
	
| # cython: infer_types=True, cdivision=True, boundscheck=False, binding=True
 | |
| from typing import List, Tuple, Generator
 | |
| from libc.stdint cimport int32_t, int64_t
 | |
| from libcpp.pair cimport pair
 | |
| from libcpp.unordered_map cimport unordered_map
 | |
| from libcpp.unordered_set cimport unordered_set
 | |
| from cython.operator cimport dereference
 | |
| cimport cython
 | |
| import weakref
 | |
| from preshed.maps cimport map_get_unless_missing
 | |
| from murmurhash.mrmr cimport hash64
 | |
| from ..typedefs cimport hash_t
 | |
| from ..strings import get_string_id
 | |
| from ..structs cimport EdgeC, GraphC
 | |
| from .token import Token
 | |
| 
 | |
| 
 | |
| @cython.freelist(8)
 | |
| cdef class Edge:
 | |
|     cdef readonly Graph graph
 | |
|     cdef readonly int i
 | |
|     
 | |
|     def __init__(self, Graph graph, int i):
 | |
|         self.graph = graph
 | |
|         self.i = i
 | |
| 
 | |
|     @property
 | |
|     def is_none(self) -> bool:
 | |
|         return False
 | |
| 
 | |
|     @property
 | |
|     def doc(self) -> "Doc":
 | |
|         return self.graph.doc
 | |
| 
 | |
|     @property
 | |
|     def head(self) -> "Node":
 | |
|         return Node(self.graph, self.graph.c.edges[self.i].head)
 | |
|     
 | |
|     @property
 | |
|     def tail(self) -> "Tail":
 | |
|         return Node(self.graph, self.graph.c.edges[self.i].tail)
 | |
| 
 | |
|     @property
 | |
|     def label(self) -> int:
 | |
|         return self.graph.c.edges[self.i].label
 | |
| 
 | |
|     @property
 | |
|     def weight(self) -> float:
 | |
|         return self.graph.c.weights[self.i]
 | |
| 
 | |
|     @property
 | |
|     def label_(self) -> str:
 | |
|         return self.doc.vocab.strings[self.label]
 | |
| 
 | |
| 
 | |
| @cython.freelist(8)
 | |
| cdef class Node:
 | |
|     cdef readonly Graph graph
 | |
|     cdef readonly int i
 | |
| 
 | |
|     def __init__(self, Graph graph, int i):
 | |
|         """A reference to a node of an annotation graph. Each node is made up of
 | |
|         an ordered set of zero or more token indices.
 | |
|         
 | |
|         Node references are usually created by the Graph object itself, or from
 | |
|         the Node or Edge objects. You usually won't need to instantiate this
 | |
|         class yourself.
 | |
|         """
 | |
|         cdef int length = graph.c.nodes.size()
 | |
|         if i >= length or -i >= length:
 | |
|             raise IndexError(f"Node index {i} out of bounds ({length})")
 | |
|         if i < 0:
 | |
|             i += length
 | |
|         self.graph = graph
 | |
|         self.i = i
 | |
| 
 | |
|     def __eq__(self, other):
 | |
|         if self.graph is not other.graph:
 | |
|             return False
 | |
|         else:
 | |
|             return self.i == other.i
 | |
| 
 | |
|     def __iter__(self) -> Generator[int]:
 | |
|         for i in self.graph.c.nodes[self.i]:
 | |
|             yield i
 | |
| 
 | |
|     def __getitem__(self, int i) -> int:
 | |
|         """Get a token index from the node's set of tokens."""
 | |
|         length = self.graph.c.nodes[self.i].size()
 | |
|         if i >= length or -i >= length:
 | |
|             raise IndexError(f"Token index {i} out of bounds ({length})")
 | |
|         if i < 0:
 | |
|             i += length
 | |
|         return self.graph.c.nodes[self.i][i]
 | |
| 
 | |
|     def __len__(self) -> int:
 | |
|         """The number of tokens that make up the node."""
 | |
|         return self.graph.c.nodes[self.i].size()
 | |
| 
 | |
|     @property
 | |
|     def is_none(self) -> bool:
 | |
|         """Whether the node is a special value, indicating 'none'.
 | |
|         
 | |
|         The NoneNode type is returned by the Graph, Edge and Node objects when
 | |
|         there is no match to a query. It has the same API as Node, but it always
 | |
|         returns NoneNode, NoneEdge or empty lists for its queries.
 | |
|         """
 | |
|         return False
 | |
|  
 | |
|     @property
 | |
|     def doc(self) -> "Doc":
 | |
|         """The Doc object that the graph refers to."""
 | |
|         return self.graph.doc
 | |
| 
 | |
|     @property
 | |
|     def tokens(self) -> Tuple[Token]:
 | |
|         """A tuple of Token objects that make up the node."""
 | |
|         doc = self.doc
 | |
|         return tuple([doc[i] for i in self])
 | |
| 
 | |
|     def head(self, i=None, label=None) -> "Node":
 | |
|         """Get the head of the first matching edge, searching by index, label,
 | |
|         both or neither.
 | |
|         
 | |
|         For instance, `node.head(i=1)` will get the head of the second edge that
 | |
|         this node is a tail of. `node.head(i=1, label="ARG0")` will further
 | |
|         check that the second edge has the label `"ARG0"`. 
 | |
|         
 | |
|         If no matching node can be found, the graph's NoneNode is returned. 
 | |
|         """
 | |
|         return self.headed(i=i, label=label)
 | |
|     
 | |
|     def tail(self, i=None, label=None) -> "Node":
 | |
|         """Get the tail of the first matching edge, searching by index, label,
 | |
|         both or neither.
 | |
|  
 | |
|         If no matching node can be found, the graph's NoneNode is returned. 
 | |
|         """
 | |
|         return self.tailed(i=i, label=label).tail
 | |
| 
 | |
|     def sibling(self, i=None, label=None):
 | |
|         """Get the first matching sibling node. Two nodes are siblings if they
 | |
|         are both tails of the same head.
 | |
|         If no matching node can be found, the graph's NoneNode is returned. 
 | |
|         """
 | |
|         if i is None:
 | |
|             siblings = self.siblings(label=label)
 | |
|             return siblings[0] if siblings else NoneNode(self)
 | |
|         else:
 | |
|             edges = []
 | |
|             for h in self.headed():
 | |
|                 edges.extend([e for e in h.tailed() if e.tail.i != self.i])
 | |
|             if i >= len(edges):
 | |
|                 return NoneNode(self)
 | |
|             elif label is not None and edges[i].label != label:
 | |
|                 return NoneNode(self)
 | |
|             else:
 | |
|                 return edges[i].tail
 | |
| 
 | |
|     def heads(self, label=None) -> List["Node"]:
 | |
|         """Find all matching heads of this node."""
 | |
|         cdef vector[int] edge_indices
 | |
|         self._find_edges(edge_indices, "head", label)
 | |
|         return [Node(self.graph, self.graph.c.edges[i].head) for i in edge_indices]
 | |
|      
 | |
|     def tails(self, label=None) -> List["Node"]:
 | |
|         """Find all matching tails of this node."""
 | |
|         cdef vector[int] edge_indices
 | |
|         self._find_edges(edge_indices, "tail", label)
 | |
|         return [Node(self.graph, self.graph.c.edges[i].tail) for i in edge_indices]
 | |
| 
 | |
|     def siblings(self, label=None) -> List["Node"]:
 | |
|         """Find all maching siblings of this node. Two nodes are siblings if they
 | |
|         are tails of the same head.
 | |
|         """
 | |
|         edges = []
 | |
|         for h in self.headed():
 | |
|             edges.extend([e for e in h.tailed() if e.tail.i != self.i])
 | |
|         if label is None:
 | |
|             return [e.tail for e in edges]
 | |
|         else:
 | |
|             return [e.tail for e in edges if e.label == label]
 | |
| 
 | |
|     def headed(self, i=None, label=None) -> Edge:
 | |
|         """Find the first matching edge headed by this node.
 | |
|         If no matching edge can be found, the graph's NoneEdge is returned.
 | |
|         """
 | |
|         start, end = self._find_range(i, self.c.n_head[self.i])
 | |
|         idx = self._find_edge("head", start, end, label)
 | |
|         if idx == -1:
 | |
|             return NoneEdge(self.graph)
 | |
|         else:
 | |
|             return Edge(self.graph, idx)
 | |
|     
 | |
|     def tailed(self, i=None, label=None) -> Edge:
 | |
|         """Find the first matching edge tailed by this node.
 | |
|         If no matching edge can be found, the graph's NoneEdge is returned.
 | |
|         """
 | |
|         start, end = self._find_range(i, self.c.n_tail[self.i])
 | |
|         idx = self._find_edge("tail", start, end, label)
 | |
|         if idx == -1:
 | |
|             return NoneEdge(self.graph)
 | |
|         else:
 | |
|             return Edge(self.graph, idx)
 | |
| 
 | |
|     def headeds(self, label=None) -> List[Edge]:
 | |
|         """Find all matching edges headed by this node."""
 | |
|         cdef vector[int] edge_indices
 | |
|         self._find_edges(edge_indices, "head", label)
 | |
|         return [Edge(self.graph, i) for i in edge_indices]
 | |
| 
 | |
|     def taileds(self, label=None) -> List["Edge"]:
 | |
|         """Find all matching edges headed by this node."""
 | |
|         cdef vector[int] edge_indices
 | |
|         self._find_edges(edge_indices, "tail", label)
 | |
|         return [Edge(self.graph, i) for i in edge_indices]
 | |
| 
 | |
|     def walk_heads(self):
 | |
|         cdef vector[int] node_indices
 | |
|         walk_head_nodes(node_indices, &self.graph.c, self.i)
 | |
|         for i in node_indices:
 | |
|             yield Node(self.graph, i)
 | |
| 
 | |
|     def walk_tails(self):
 | |
|         cdef vector[int] node_indices
 | |
|         walk_tail_nodes(node_indices, &self.graph.c, self.i)
 | |
|         for i in node_indices:
 | |
|             yield Node(self.graph, i)
 | |
| 
 | |
|     cdef (int, int) _get_range(self, i, n):
 | |
|         if i is None:
 | |
|             return (0, n)
 | |
|         elif i < n:
 | |
|             return (i, i+1)
 | |
|         else:
 | |
|             return (0, 0)
 | |
| 
 | |
|     cdef int _find_edge(self, str direction, int start, int end, label) except -2:
 | |
|         if direction == "head":
 | |
|             get_edges = get_head_edges
 | |
|         else:
 | |
|             get_edges = get_tail_edges
 | |
|         cdef vector[int] edge_indices
 | |
|         get_edges(edge_indices, &self.graph.c, self.i)
 | |
|         if label is None:
 | |
|             return edge_indices[start]
 | |
|         for edge_index in edge_indices[start:end]:
 | |
|             if self.graph.c.edges[edge_index].label == label:
 | |
|                 return edge_index
 | |
|         else:
 | |
|             return -1
 | |
| 
 | |
|     cdef int _find_edges(self, vector[int]& edge_indices, str direction, label):
 | |
|         if direction == "head":
 | |
|             get_edges = get_head_edges
 | |
|         else:
 | |
|             get_edges = get_tail_edges
 | |
|         if label is None:
 | |
|             get_edges(edge_indices, &self.graph.c, self.i)
 | |
|             return edge_indices.size()
 | |
|         cdef vector[int] unfiltered
 | |
|         get_edges(unfiltered, &self.graph.c, self.i)
 | |
|         for edge_index in unfiltered:
 | |
|             if self.graph.c.edges[edge_index].label == label:
 | |
|                 edge_indices.push_back(edge_index)
 | |
|         return edge_indices.size()
 | |
| 
 | |
| 
 | |
| cdef class NoneEdge(Edge):
 | |
|     """An Edge subclass, representing a non-result. The NoneEdge has the same
 | |
|     API as other Edge instances, but always returns NoneEdge, NoneNode, or empty
 | |
|     lists.
 | |
|     """
 | |
|     def __init__(self, graph):
 | |
|         self.graph = graph
 | |
|         self.i = -1
 | |
|    
 | |
|     @property
 | |
|     def doc(self) -> "Doc":
 | |
|         return self.graph.doc
 | |
| 
 | |
|     @property
 | |
|     def head(self) -> "NoneNode":
 | |
|         return NoneNode(self.graph)
 | |
|     
 | |
|     @property
 | |
|     def tail(self) -> "NoneNode":
 | |
|         return NoneNode(self.graph)
 | |
| 
 | |
|     @property
 | |
|     def label(self) -> int:
 | |
|         return 0
 | |
| 
 | |
|     @property
 | |
|     def weight(self) -> float:
 | |
|         return 0.0
 | |
| 
 | |
|     @property
 | |
|     def label_(self) -> str:
 | |
|         return ""
 | |
| 
 | |
| 
 | |
| cdef class NoneNode(Node):
 | |
|     def __init__(self, graph):
 | |
|         self.graph = graph
 | |
|         self.i = -1
 | |
| 
 | |
|     def __getitem__(self, int i):
 | |
|         raise IndexError("Cannot index into NoneNode.")
 | |
| 
 | |
|     def __len__(self):
 | |
|         return 0
 | |
|  
 | |
|     @property
 | |
|     def is_none(self):
 | |
|         return -1
 | |
| 
 | |
|     @property
 | |
|     def doc(self):
 | |
|         return self.graph.doc
 | |
| 
 | |
|     @property
 | |
|     def tokens(self):
 | |
|         return tuple()
 | |
| 
 | |
|     def head(self, i=None, label=None):
 | |
|         return self
 | |
| 
 | |
|     def tail(self, i=None, label=None):
 | |
|         return self
 | |
| 
 | |
|     def walk_heads(self):
 | |
|         yield from [] 
 | |
|     
 | |
|     def walk_tails(self):
 | |
|         yield from [] 
 | |
|  
 | |
| 
 | |
| cdef class Graph:
 | |
|     """A set of directed labelled relationships between sets of tokens.
 | |
|     
 | |
|     EXAMPLE:
 | |
|         Construction 1
 | |
|         >>> graph = Graph(doc, name="srl")
 | |
| 
 | |
|         Construction 2
 | |
|         >>> graph = Graph(
 | |
|             doc,
 | |
|             name="srl",
 | |
|             nodes=[(0,), (1, 3), (,)],
 | |
|             edges=[(0, 2), (2, 1)]
 | |
|         )
 | |
| 
 | |
|         Construction 3
 | |
|         >>> graph = Graph(
 | |
|             doc,
 | |
|             name="srl",
 | |
|             nodes=[(0,), (1, 3), (,)],
 | |
|             edges=[(2, 0), (0, 1)],
 | |
|             labels=["word sense ID 1675", "agent"],
 | |
|             weights=[-42.6, -1.7]
 | |
|         )
 | |
|         >>> assert graph.has_node((0,))
 | |
|         >>> assert graph.has_edge((0,), (1,3), label="agent")
 | |
|     """
 | |
|     def __init__(self, doc, *, name="", nodes=[], edges=[], labels=None, weights=None):
 | |
|         """Create a Graph object.
 | |
| 
 | |
|         doc (Doc): The Doc object the graph will refer to.
 | |
|         name (str): A string name to help identify the graph. Defaults to "".
 | |
|         nodes (List[Tuple[int]]): A list of token-index tuples to add to the graph
 | |
|             as nodes. Defaults to [].
 | |
|         edges (List[Tuple[int, int]]): A list of edges between the provided nodes.
 | |
|             Each edge should be a (head, tail) tuple, where `head` and `tail`
 | |
|             are integers pointing into the `nodes` list. Defaults to [].
 | |
|         labels (Optional[List[str]]): A list of labels for the provided edges.
 | |
|             If None, all of the edges specified by the edges argument will have
 | |
|             be labelled with the empty string (""). If `labels` is not `None`,
 | |
|             it must have the same length as the `edges` argument.
 | |
|         weights (Optional[List[float]]): A list of weights for the provided edges.
 | |
|             If None, all of the edges specified by the edges argument will 
 | |
|             have the weight 0.0. If `weights` is not `None`, it must have the
 | |
|             same length as the `edges` argument.
 | |
|         """
 | |
|         if weights is not None:
 | |
|             assert len(weights) == len(edges)
 | |
|         else:
 | |
|             weights = [0.0] * len(edges)
 | |
|         if labels is not None:
 | |
|             assert len(labels) == len(edges)
 | |
|         else:
 | |
|             labels = [""] * len(edges)
 | |
|         self.c.node_map = new unordered_map[hash_t, int]()
 | |
|         self.c.edge_map = new unordered_map[hash_t, int]()
 | |
|         self.c.roots = new unordered_set[int]()
 | |
|         self.name = name
 | |
|         self.doc_ref = weakref.ref(doc)
 | |
|         for node in nodes:
 | |
|             self.add_node(node)
 | |
|         for (head, tail), label, weight in zip(edges, labels, weights):
 | |
|             self.add_edge(
 | |
|                 Node(self, head),
 | |
|                 Node(self, tail),
 | |
|                 label=label,
 | |
|                 weight=weight
 | |
|             )
 | |
| 
 | |
|     def __dealloc__(self):
 | |
|         del self.c.node_map
 | |
|         del self.c.edge_map
 | |
|         del self.c.roots
 | |
| 
 | |
|     @property
 | |
|     def doc(self) -> "Doc":
 | |
|         """The Doc object the graph refers to."""
 | |
|         return self.doc_ref()
 | |
| 
 | |
|     @property
 | |
|     def edges(self) -> Generator[Edge]:
 | |
|         """Iterate over the edges in the graph."""
 | |
|         for i in range(self.c.edges.size()):
 | |
|             yield Edge(self, i)
 | |
| 
 | |
|     @property
 | |
|     def nodes(self) -> Generator[Node]:
 | |
|         """Iterate over the nodes in the graph."""
 | |
|         for i in range(self.c.nodes.size()):
 | |
|             yield Node(self, i)
 | |
| 
 | |
|     def add_edge(self, head, tail, *, label="", weight=None) -> Edge:
 | |
|         """Add an edge to the graph, connecting two groups of tokens.
 | |
|        
 | |
|         If there is already an edge for the (head, tail, label) triple, it will
 | |
|         be returned, and no new edge will be created. The weight of the edge
 | |
|         will be updated if a weight is specified.
 | |
|         """
 | |
|         label_hash = self.doc.vocab.strings.as_int(label)
 | |
|         weight_float = weight if weight is not None else 0.0
 | |
|         edge_index = add_edge(
 | |
|             &self.c,
 | |
|             EdgeC(
 | |
|                 head=self.add_node(head).i,
 | |
|                 tail=self.add_node(tail).i,
 | |
|                 label=self.doc.vocab.strings.as_int(label),
 | |
|             ),
 | |
|             weight=weight if weight is not None else 0.0
 | |
|         )
 | |
|         return Edge(self, edge_index)
 | |
| 
 | |
|     def get_edge(self, head, tail, *, label="") -> Edge:
 | |
|         """Look up an edge in the graph. If the graph has no matching edge,
 | |
|         the NoneEdge object is returned.
 | |
|         """
 | |
|         head_node = self.get_node(head)
 | |
|         if head_node.is_none:
 | |
|             return NoneEdge(self)
 | |
|         tail_node = self.get_node(tail)
 | |
|         if tail_node.is_none:
 | |
|             return NoneEdge(self)
 | |
|         edge_index = get_edge(
 | |
|             &self.c,
 | |
|             EdgeC(head=head_node.i, tail=tail_node.i, label=get_string_id(label))
 | |
|         )
 | |
|         if edge_index < 0:
 | |
|             return NoneEdge(self)
 | |
|         else:
 | |
|             return Edge(self, edge_index)
 | |
| 
 | |
|     def has_edge(self, head, tail, label) -> bool:
 | |
|         """Check whether a (head, tail, label) triple is an edge in the graph."""
 | |
|         return not self.get_edge(head, tail, label=label).is_none
 | |
|     
 | |
|     def add_node(self, indices) -> Node:
 | |
|         """Add a node to the graph and return it. Nodes refer to ordered sets
 | |
|         of token indices.
 | |
|         
 | |
|         This method is idempotent: if there is already a node for the given
 | |
|         indices, it is returned without a new node being created.
 | |
|         """
 | |
|         if isinstance(indices, Node):
 | |
|             return indices
 | |
|         cdef vector[int32_t] node 
 | |
|         node.reserve(len(indices))
 | |
|         for idx in indices:
 | |
|             node.push_back(idx)
 | |
|         i = add_node(&self.c, node)
 | |
|         print("Add node", indices, i)
 | |
|         return Node(self, i)
 | |
| 
 | |
|     def get_node(self, indices) -> Node:
 | |
|         """Get a node from the graph, or the NoneNode if there is no node for
 | |
|         the given indices.
 | |
|         """
 | |
|         if isinstance(indices, Node):
 | |
|             return indices
 | |
|         cdef vector[int32_t] node 
 | |
|         node.reserve(len(indices))
 | |
|         for idx in indices:
 | |
|             node.push_back(idx)
 | |
|         node_index = get_node(&self.c, node)
 | |
|         if node_index < 0:
 | |
|             return NoneNode(self)
 | |
|         else:
 | |
|             print("Get node", indices, node_index)
 | |
|             return Node(self, node_index)
 | |
|  
 | |
|     def has_node(self, tuple indices) -> bool:
 | |
|         """Check whether the graph has a node for the given indices."""
 | |
|         return not self.get_node(indices).is_none
 | |
| 
 | |
| 
 | |
| cdef int add_edge(GraphC* graph, EdgeC edge, float weight) nogil:
 | |
|     key = hash64(&edge, sizeof(edge), 0)
 | |
|     it = graph.edge_map.find(key)
 | |
|     if it != graph.edge_map.end():
 | |
|         edge_index = dereference(it).second
 | |
|         graph.weights[edge_index] = weight
 | |
|         return edge_index
 | |
|     else:
 | |
|         edge_index = graph.edges.size()
 | |
|         graph.edge_map.insert(pair[hash_t, int](key, edge_index))
 | |
|         graph.edges.push_back(edge)
 | |
|         if graph.n_tails[edge.head] == 0:
 | |
|             graph.first_tail[edge.head] = edge_index
 | |
|         if graph.n_heads[edge.tail] == 0:
 | |
|             graph.first_head[edge.tail] = edge_index
 | |
|         graph.n_tails[edge.head] += 1
 | |
|         graph.n_heads[edge.tail] += 1
 | |
|         graph.weights.push_back(weight)
 | |
|         # If we had the tail marked as a root, remove it.
 | |
|         tail_root_index = graph.roots.find(edge.tail)
 | |
|         if tail_root_index != graph.roots.end():
 | |
|             graph.roots.erase(tail_root_index)
 | |
|         return edge_index
 | |
| 
 | |
| 
 | |
| cdef int get_edge(const GraphC* graph, EdgeC edge) nogil:
 | |
|     key = hash64(&edge, sizeof(edge), 0)
 | |
|     it = graph.edge_map.find(key)
 | |
|     if it == graph.edge_map.end():
 | |
|         return -1
 | |
|     else:
 | |
|         return dereference(it).second
 | |
| 
 | |
| 
 | |
| cdef int has_edge(const GraphC* graph, EdgeC edge) nogil:
 | |
|     return get_edge(graph, edge) >= 0
 | |
| 
 | |
| 
 | |
| cdef int add_node(GraphC* graph, vector[int32_t]& node) nogil:
 | |
|     key = hash64(&node[0], node.size() * sizeof(node[0]), 0)
 | |
|     it = graph.node_map.find(key)
 | |
|     if it != graph.node_map.end():
 | |
|         # Item found. Convert the iterator to an index value.
 | |
|         return dereference(it).second
 | |
|     else:
 | |
|         index = graph.nodes.size()
 | |
|         graph.nodes.push_back(node)
 | |
|         graph.n_heads.push_back(0)
 | |
|         graph.n_tails.push_back(0)
 | |
|         graph.first_head.push_back(0)
 | |
|         graph.first_tail.push_back(0)
 | |
|         graph.roots.insert(index)
 | |
|         graph.node_map.insert(pair[hash_t, int](key, index))
 | |
|         return index
 | |
|  
 | |
| 
 | |
| cdef int get_node(const GraphC* graph, vector[int32_t] node) nogil:
 | |
|     key = hash64(&node[0], node.size() * sizeof(node[0]), 0)
 | |
|     it = graph.node_map.find(key)
 | |
|     if it == graph.node_map.end():
 | |
|         return -1
 | |
|     else:
 | |
|         return dereference(it).second
 | |
| 
 | |
| 
 | |
| cdef int has_node(const GraphC* graph, vector[int32_t] node) nogil:
 | |
|     return get_node(graph, node) >= 0
 | |
| 
 | |
| 
 | |
| cdef int get_head_nodes(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     todo = graph.n_heads[node]
 | |
|     if todo == 0:
 | |
|         return 0
 | |
|     output.reserve(output.size() + todo)
 | |
|     start = graph.first_head[node] 
 | |
|     end = graph.edges.size()
 | |
|     for i in range(start, end):
 | |
|         if todo <= 0:
 | |
|             break
 | |
|         elif graph.edges[i].tail == node:
 | |
|             output.push_back(graph.edges[i].head)
 | |
|             todo -= 1
 | |
|     return todo
 | |
| 
 | |
| 
 | |
| cdef int get_tail_nodes(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     todo = graph.n_tails[node]
 | |
|     if todo == 0:
 | |
|         return 0
 | |
|     output.reserve(output.size() + todo)
 | |
|     start = graph.first_tail[node] 
 | |
|     end = graph.edges.size()
 | |
|     for i in range(start, end):
 | |
|         if todo <= 0:
 | |
|             break
 | |
|         elif graph.edges[i].head == node:
 | |
|             output.push_back(graph.edges[i].tail)
 | |
|             todo -= 1
 | |
|     return todo
 | |
| 
 | |
| 
 | |
| cdef int get_sibling_nodes(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     cdef vector[int] heads
 | |
|     cdef vector[int] tails
 | |
|     get_head_nodes(heads, graph, node)
 | |
|     for i in range(heads.size()):
 | |
|         get_tail_nodes(tails, graph, heads[i])
 | |
|         for j in range(tails.size()):
 | |
|             if tails[j] != node:
 | |
|                 output.push_back(tails[j])
 | |
|         tails.clear()
 | |
|     return output.size()
 | |
| 
 | |
| 
 | |
| cdef int get_head_edges(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     todo = graph.n_heads[node]
 | |
|     if todo == 0:
 | |
|         return 0
 | |
|     output.reserve(output.size() + todo)
 | |
|     start = graph.first_head[node] 
 | |
|     end = graph.edges.size()
 | |
|     for i in range(start, end):
 | |
|         if todo <= 0:
 | |
|             break
 | |
|         elif graph.edges[i].tail == node:
 | |
|             output.push_back(i)
 | |
|             todo -= 1
 | |
|     return todo
 | |
| 
 | |
| 
 | |
| cdef int get_tail_edges(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     todo = graph.n_tails[node]
 | |
|     if todo == 0:
 | |
|         return 0
 | |
|     output.reserve(output.size() + todo)
 | |
|     start = graph.first_tail[node] 
 | |
|     end = graph.edges.size()
 | |
|     for i in range(start, end):
 | |
|         if todo <= 0:
 | |
|             break
 | |
|         elif graph.edges[i].head == node:
 | |
|             output.push_back(i)
 | |
|             todo -= 1
 | |
|     return todo
 | |
| 
 | |
| 
 | |
| cdef int walk_head_nodes(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     cdef unordered_set[int] seen = unordered_set[int]()
 | |
|     get_head_nodes(output, graph, node)
 | |
|     seen.insert(node)
 | |
|     i = 0
 | |
|     while i < output.size():
 | |
|         with gil:
 | |
|             print("Walk up from", output[i])
 | |
|         if seen.find(output[i]) == seen.end():
 | |
|             seen.insert(output[i])
 | |
|             get_head_nodes(output, graph, output[i])
 | |
|         i += 1
 | |
|     return i
 | |
| 
 | |
| 
 | |
| cdef int walk_tail_nodes(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     cdef unordered_set[int] seen = unordered_set[int]()
 | |
|     get_tail_nodes(output, graph, node)
 | |
|     seen.insert(node)
 | |
|     i = 0
 | |
|     while i < output.size():
 | |
|         if seen.find(output[i]) == seen.end():
 | |
|             seen.insert(output[i])
 | |
|             get_tail_nodes(output, graph, output[i])
 | |
|         i += 1
 | |
|     return i
 | |
| 
 | |
| 
 | |
| cdef int walk_head_edges(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     cdef unordered_set[int] seen = unordered_set[int]()
 | |
|     get_head_edges(output, graph, node)
 | |
|     seen.insert(node)
 | |
|     i = 0
 | |
|     while i < output.size():
 | |
|         if seen.find(output[i]) == seen.end():
 | |
|             seen.insert(output[i])
 | |
|             get_head_edges(output, graph, output[i])
 | |
|         i += 1
 | |
|     return i
 | |
| 
 | |
| 
 | |
| cdef int walk_tail_edges(vector[int]& output, const GraphC* graph, int node) nogil:
 | |
|     cdef unordered_set[int] seen = unordered_set[int]()
 | |
|     get_tail_edges(output, graph, node)
 | |
|     seen.insert(node)
 | |
|     i = 0
 | |
|     while i < output.size():
 | |
|         if seen.find(output[i]) == seen.end():
 | |
|             seen.insert(output[i])
 | |
|             get_tail_edges(output, graph, output[i])
 | |
|         i += 1
 | |
|     return i
 |