-
Notifications
You must be signed in to change notification settings - Fork 4
Open
Description
I'm testing the conversion from conllu to folia and next to annotatedtext as follows on the following conllu file called traindata.conllu
# newdoc id = doc1
# newpar
# sent_id = 1
# text = Ik ben de weg kwijt, kunt u me zeggen waar de Lange Wapper ligt?
1 Ik ik PRON Pron|per|1|ev|nom Case=Nom|Number=Sing|Person=1|PronType=Prs 5 nsubj _ _
2 ben ben AUX V|hulpofkopp|ott|1|ev Aspect=Imp|Mood=Ind|Number=Sing|Person=1|Tense=Pres|VerbForm=Fin 5 cop _ _
3 de de DET Art|bep|zijdofmv|neut Definite=Def|PronType=Art 4 det _ _
4 weg weg NOUN N|soort|ev|neut Number=Sing 5 obj _ _
5 kwijt kwijt ADJ Adj|attr|stell|onverv Degree=Pos 0 root _ SpaceAfter=No
6 , , PUNCT Punc|komma PunctType=Comm 5 punct _ _
7 kunt kan VERB V|hulp|ott|2|ev Aspect=Imp|Mood=Ind|Number=Sing|Person=2|Tense=Pres|VerbForm=Fin|VerbType=Mod 5 parataxis _ _
8 u u PRON Pron|per|2|ev|nom Case=Nom|Number=Sing|Person=2|PronType=Prs 7 nsubj _ _
9 me me PRON Pron|per|1|ev|datofacc Case=Acc,Dat|Number=Sing|Person=1|PronType=Prs 10 obj _ _
10 zeggen zeg VERB V|trans|inf Subcat=Tran|VerbForm=Inf 7 xcomp _ _
11 waar waar ADV Adv|gew|vrag PronType=Int 15 mark _ _
12 de de DET Art|bep|zijdofmv|neut Definite=Def|PronType=Art 13 det _ _
13 Lange Lange PROPN N_N|eigen|ev|neut_eigen|ev|neut _ 15 nsubj _ _
14 Wapper Wapper PROPN PROPN _ 13 flat _ _
15 ligt lig VERB V|intrans|ott|3|ev Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin 10 acl _ SpaceAfter=No
16 ? ? PUNCT Punc|vraag PunctType=Qest 5 punct _ _
# sent_id = 2
# text = Jazeker meneer
1 Jazeker zeker ADJ Adj|attr|stell|onverv Degree=Pos 2 amod _ _
2 meneer meneer NOUN N|soort|ev|neut Number=Sing 0 root _ SpacesAfter=\n
# newdoc id = doc2
# newpar
# sent_id = 1
# text = Het gaat vooruit, het gaat verbazend goed vooruit
1 Het het PRON Pron|onbep|neut|zelfst PronType=Ind 2 nsubj _ _
2 gaat ga VERB V|intrans|ott|3|ev Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin 0 root _ _
3 vooruit vooruit ADV Adv|gew|geenfunc|stell|onverv Degree=Pos 2 advmod _ SpaceAfter=No
4 , , PUNCT Punc|komma PunctType=Comm 2 punct _ _
5 het het PRON Pron|onbep|neut|zelfst PronType=Ind 6 nsubj _ _
6 gaat ga VERB V|intrans|ott|3|ev Aspect=Imp|Mood=Ind|Number=Sing|Person=3|Subcat=Intr|Tense=Pres|VerbForm=Fin 2 parataxis _ _
7 verbazend verbazend VERB V|intrans|tegdw|onverv Subcat=Intr|Tense=Pres|VerbForm=Part 6 advcl _ _
8 goed goed ADJ Adj|adv|stell|onverv Degree=Pos|Variant=Short 6 obl _ _
9 vooruit vooruit ADV Adv|gew|geenfunc|stell|onverv Degree=Pos 6 compound:prt _ SpacesAfter=\n
Jan@bnosac MINGW64 ~/Dropbox/Work/RForgeBNOSAC/BNOSAC/udpipe/inst/dummydata (master)
$ conllu2folia traindata.conllu
Wrote doc1.folia.xml
Wrote doc2.folia.xml
Jan@bnosac MINGW64 ~/Dropbox/Work/RForgeBNOSAC/BNOSAC/udpipe/inst/dummydata (master)
$ folia2annotatedtxt -c text,pos,lemma doc1.folia.xml > test.tmp
Processing doc1.folia.xml
Traceback (most recent call last):
File "c:\python39\lib\runpy.py", line 197, in _run_module_as_main
return _run_code(code, main_globals, None,
File "c:\python39\lib\runpy.py", line 87, in _run_code
exec(code, run_globals)
File "C:\Python39\Scripts\folia2annotatedtxt.exe\__main__.py", line 7, in <module>
File "c:\python39\lib\site-packages\foliatools\folia2annotatedtxt.py", line 117, in main
process(x, outputfile)
File "c:\python39\lib\site-packages\foliatools\folia2annotatedtxt.py", line 174, in process
if w.paragraph() != prevpar and i > 0:
File "c:\python39\lib\site-packages\folia\main.py", line 3844, in paragraph
return self.ancestor(Paragraph)
File "c:\python39\lib\site-packages\folia\main.py", line 2528, in ancestor
raise NoSuchAnnotation
folia.main.NoSuchAnnotation
File causing the failure here (doc1.folia.xml) looks like this
<?xml version='1.0' encoding='utf-8'?>
<FoLiA xmlns="http://ilk.uvt.nl/folia" xmlns:xlink="http://www.w3.org/1999/xlink" xml:id="doc1" version="2.5.1" generator="foliapy-v2.5.6">
<metadata type="native">
<annotations>
<text-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/text.foliaset.ttl">
<annotator processor="proc.conllu2folia.5e385a4e"/>
</text-annotation>
<sentence-annotation>
<annotator processor="proc.conllu2folia.5e385a4e"/>
</sentence-annotation>
<token-annotation>
<annotator processor="proc.conllu2folia.5e385a4e"/>
</token-annotation>
<pos-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl">
<annotator processor="proc.conllu2folia.5e385a4e"/>
</pos-annotation>
<pos-annotation set="undefined">
<annotator processor="proc.conllu2folia.5e385a4e"/>
</pos-annotation>
<lemma-annotation set="undefined">
<annotator processor="proc.conllu2folia.5e385a4e"/>
</lemma-annotation>
<dependency-annotation set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-dependencies.foliaset.ttl">
<annotator processor="proc.conllu2folia.5e385a4e"/>
</dependency-annotation>
</annotations>
<provenance>
<processor xml:id="proc.conllu2folia.5e385a4e" name="conllu2folia" type="auto" version="2.5.2" folia_version="2.5.1" command="conllu2folia traindata.conllu" host="bnosac" begindatetime="2021-08-25T17:34:37">
<processor xml:id="proc.conllu2folia.5e385a4e.generator" name="foliapy" type="generator" version="2.5.6" folia_version="2.5.1" src="https://github.com/proycon/foliapy"/>
</processor>
</provenance>
</metadata>
<text xml:id="doc1.text">
<s xml:id="doc1.s.1">
<t class="original">Ik ben de weg kwijt, kunt u me zeggen waar de Lange Wapper ligt?</t>
<w xml:id="doc1.s.1.w.1">
<t>Ik</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PRON">
<feat subset="Case" class="Nom"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="1"/>
<feat subset="PronType" class="Prs"/>
</pos>
<pos set="undefined" class="Pron|per|1|ev|nom"/>
<lemma class="ik"/>
</w>
<w xml:id="doc1.s.1.w.2">
<t>ben</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="AUX">
<feat subset="Aspect" class="Imp"/>
<feat subset="Mood" class="Ind"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="1"/>
<feat subset="Tense" class="Pres"/>
<feat subset="VerbForm" class="Fin"/>
</pos>
<pos set="undefined" class="V|hulpofkopp|ott|1|ev"/>
<lemma class="ben"/>
</w>
<w xml:id="doc1.s.1.w.3">
<t>de</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="DET">
<feat subset="Definite" class="Def"/>
<feat subset="PronType" class="Art"/>
</pos>
<pos set="undefined" class="Art|bep|zijdofmv|neut"/>
<lemma class="de"/>
</w>
<w xml:id="doc1.s.1.w.4">
<t>weg</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="NOUN">
<feat subset="Number" class="Sing"/>
</pos>
<pos set="undefined" class="N|soort|ev|neut"/>
<lemma class="weg"/>
</w>
<w xml:id="doc1.s.1.w.5" space="no">
<t>kwijt</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="ADJ">
<feat subset="Degree" class="Pos"/>
</pos>
<pos set="undefined" class="Adj|attr|stell|onverv"/>
<lemma class="kwijt"/>
</w>
<w xml:id="doc1.s.1.w.6">
<t>,</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PUNCT">
<feat subset="PunctType" class="Comm"/>
</pos>
<pos set="undefined" class="Punc|komma"/>
<lemma class=","/>
</w>
<w xml:id="doc1.s.1.w.7">
<t>kunt</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="VERB">
<feat subset="Aspect" class="Imp"/>
<feat subset="Mood" class="Ind"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="2"/>
<feat subset="Tense" class="Pres"/>
<feat subset="VerbForm" class="Fin"/>
<feat subset="VerbType" class="Mod"/>
</pos>
<pos set="undefined" class="V|hulp|ott|2|ev"/>
<lemma class="kan"/>
</w>
<w xml:id="doc1.s.1.w.8">
<t>u</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PRON">
<feat subset="Case" class="Nom"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="2"/>
<feat subset="PronType" class="Prs"/>
</pos>
<pos set="undefined" class="Pron|per|2|ev|nom"/>
<lemma class="u"/>
</w>
<w xml:id="doc1.s.1.w.9">
<t>me</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PRON">
<feat subset="Case" class="Acc,Dat"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="1"/>
<feat subset="PronType" class="Prs"/>
</pos>
<pos set="undefined" class="Pron|per|1|ev|datofacc"/>
<lemma class="me"/>
</w>
<w xml:id="doc1.s.1.w.10">
<t>zeggen</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="VERB">
<feat subset="Subcat" class="Tran"/>
<feat subset="VerbForm" class="Inf"/>
</pos>
<pos set="undefined" class="V|trans|inf"/>
<lemma class="zeg"/>
</w>
<w xml:id="doc1.s.1.w.11">
<t>waar</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="ADV">
<feat subset="PronType" class="Int"/>
</pos>
<pos set="undefined" class="Adv|gew|vrag"/>
<lemma class="waar"/>
</w>
<w xml:id="doc1.s.1.w.12">
<t>de</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="DET">
<feat subset="Definite" class="Def"/>
<feat subset="PronType" class="Art"/>
</pos>
<pos set="undefined" class="Art|bep|zijdofmv|neut"/>
<lemma class="de"/>
</w>
<w xml:id="doc1.s.1.w.13">
<t>Lange</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PROPN"/>
<pos set="undefined" class="N_N|eigen|ev|neut_eigen|ev|neut"/>
<lemma class="Lange"/>
</w>
<w xml:id="doc1.s.1.w.14">
<t>Wapper</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PROPN"/>
<pos set="undefined" class="PROPN"/>
<lemma class="Wapper"/>
</w>
<w xml:id="doc1.s.1.w.15" space="no">
<t>ligt</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="VERB">
<feat subset="Aspect" class="Imp"/>
<feat subset="Mood" class="Ind"/>
<feat subset="Number" class="Sing"/>
<feat subset="Person" class="3"/>
<feat subset="Subcat" class="Intr"/>
<feat subset="Tense" class="Pres"/>
<feat subset="VerbForm" class="Fin"/>
</pos>
<pos set="undefined" class="V|intrans|ott|3|ev"/>
<lemma class="lig"/>
</w>
<w xml:id="doc1.s.1.w.16">
<t>?</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="PUNCT">
<feat subset="PunctType" class="Qest"/>
</pos>
<pos set="undefined" class="Punc|vraag"/>
<lemma class="?"/>
</w>
<dependencies>
<dependency class="nsubj">
<dep>
<wref id="doc1.s.1.w.1" t="Ik"/>
</dep>
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
</dependency>
<dependency class="cop">
<dep>
<wref id="doc1.s.1.w.2" t="ben"/>
</dep>
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
</dependency>
<dependency class="det">
<dep>
<wref id="doc1.s.1.w.3" t="de"/>
</dep>
<hd>
<wref id="doc1.s.1.w.4" t="weg"/>
</hd>
</dependency>
<dependency class="obj">
<dep>
<wref id="doc1.s.1.w.4" t="weg"/>
</dep>
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
</dependency>
<dependency class="punct">
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
<dep>
<wref id="doc1.s.1.w.6" t=","/>
</dep>
</dependency>
<dependency class="parataxis">
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
<dep>
<wref id="doc1.s.1.w.7" t="kunt"/>
</dep>
</dependency>
<dependency class="nsubj">
<hd>
<wref id="doc1.s.1.w.7" t="kunt"/>
</hd>
<dep>
<wref id="doc1.s.1.w.8" t="u"/>
</dep>
</dependency>
<dependency class="obj">
<dep>
<wref id="doc1.s.1.w.9" t="me"/>
</dep>
<hd>
<wref id="doc1.s.1.w.10" t="zeggen"/>
</hd>
</dependency>
<dependency class="xcomp">
<hd>
<wref id="doc1.s.1.w.7" t="kunt"/>
</hd>
<dep>
<wref id="doc1.s.1.w.10" t="zeggen"/>
</dep>
</dependency>
<dependency class="mark">
<dep>
<wref id="doc1.s.1.w.11" t="waar"/>
</dep>
<hd>
<wref id="doc1.s.1.w.15" t="ligt"/>
</hd>
</dependency>
<dependency class="det">
<dep>
<wref id="doc1.s.1.w.12" t="de"/>
</dep>
<hd>
<wref id="doc1.s.1.w.13" t="Lange"/>
</hd>
</dependency>
<dependency class="nsubj">
<dep>
<wref id="doc1.s.1.w.13" t="Lange"/>
</dep>
<hd>
<wref id="doc1.s.1.w.15" t="ligt"/>
</hd>
</dependency>
<dependency class="flat">
<hd>
<wref id="doc1.s.1.w.13" t="Lange"/>
</hd>
<dep>
<wref id="doc1.s.1.w.14" t="Wapper"/>
</dep>
</dependency>
<dependency class="acl">
<hd>
<wref id="doc1.s.1.w.10" t="zeggen"/>
</hd>
<dep>
<wref id="doc1.s.1.w.15" t="ligt"/>
</dep>
</dependency>
<dependency class="punct">
<hd>
<wref id="doc1.s.1.w.5" t="kwijt"/>
</hd>
<dep>
<wref id="doc1.s.1.w.16" t="?"/>
</dep>
</dependency>
</dependencies>
</s>
<s xml:id="doc1.s.2">
<t class="original">Jazeker meneer</t>
<w xml:id="doc1.s.2.w.1">
<t>Jazeker</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="ADJ">
<feat subset="Degree" class="Pos"/>
</pos>
<pos set="undefined" class="Adj|attr|stell|onverv"/>
<lemma class="zeker"/>
</w>
<w xml:id="doc1.s.2.w.2">
<t>meneer</t>
<pos set="https://raw.githubusercontent.com/proycon/folia/master/setdefinitions/universal-pos.foliaset.ttl" class="NOUN">
<feat subset="Number" class="Sing"/>
</pos>
<pos set="undefined" class="N|soort|ev|neut"/>
<lemma class="meneer"/>
</w>
<dependencies>
<dependency class="amod">
<dep>
<wref id="doc1.s.2.w.1" t="Jazeker"/>
</dep>
<hd>
<wref id="doc1.s.2.w.2" t="meneer"/>
</hd>
</dependency>
</dependencies>
</s>
</text>
</FoLiA>
Metadata
Metadata
Assignees
Labels
No labels