fixing all languages

This commit is contained in:
svlandeg 2020-05-21 00:17:28 +02:00
parent b509a3e7fc
commit b221bcf1ba
7 changed files with 43 additions and 31 deletions

View File

@ -31,16 +31,15 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree):
continue
flag = False flag = False
if word.pos == NOUN: if word.pos == NOUN:
# check for patterns such as γραμμή παραγωγής # check for patterns such as γραμμή παραγωγής
for potential_nmod in word.rights: for potential_nmod in word.rights:
if potential_nmod.dep == nmod: if potential_nmod.dep == nmod:
seen.update( w_range = range(word.left_edge.i, potential_nmod.i + 1)
j for j in range(word.left_edge.i, potential_nmod.i + 1) if any(j in seen for j in w_range):
) continue
seen.update(j for j in w_range)
yield word.left_edge.i, potential_nmod.i + 1, np_label yield word.left_edge.i, potential_nmod.i + 1, np_label
flag = True flag = True
break break
@ -54,9 +53,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label

View File

@ -36,9 +36,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(j in seen for j in range(word.left_edge.i, word.i + 1)): w_range = range(word.left_edge.i, word.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -46,9 +47,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(j in seen for j in range(word.left_edge.i, word.i + 1)): w_range = range(word.left_edge.i, word.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label

View File

@ -36,9 +36,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -46,9 +47,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.i + 1, np_label yield word.left_edge.i, word.i + 1, np_label

View File

@ -35,9 +35,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -45,9 +46,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label

View File

@ -35,9 +35,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -45,9 +46,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label

View File

@ -35,9 +35,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -45,9 +46,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label

View File

@ -36,9 +36,10 @@ def noun_chunks(obj):
if word.i in seen: if word.i in seen:
continue continue
if word.dep in np_deps: if word.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label
elif word.dep == conj: elif word.dep == conj:
head = word.head head = word.head
@ -46,9 +47,10 @@ def noun_chunks(obj):
head = head.head head = head.head
# If the head is an NP, and we're coordinated to it, we're an NP # If the head is an NP, and we're coordinated to it, we're an NP
if head.dep in np_deps: if head.dep in np_deps:
if any(w.i in seen for w in word.subtree): w_range = range(word.left_edge.i, word.right_edge.i + 1)
if any(j in seen for j in w_range):
continue continue
seen.update(j for j in range(word.left_edge.i, word.right_edge.i + 1)) seen.update(j for j in w_range)
yield word.left_edge.i, word.right_edge.i + 1, np_label yield word.left_edge.i, word.right_edge.i + 1, np_label