![]() |
omorfi 0.9.9
Open morphology of Finnish
|
Public Member Functions | |
| def | __init__ (self) |
| def | __str__ (self) |
| def | get_upos (self) |
| def | get_lemmas (self) |
| def | get_ufeats (self) |
| def | get_ftb_feats (self) |
| def | get_unimorph_feats (self) |
| def | get_vislcg_feats (self) |
| def | get_segments (self, split_morphs=True, split_words=True, split_new_words=True, split_derivs=False, split_nonwords=False) |
| def | get_moses_factor_segments (self) |
| def | get_ud_misc (self) |
| segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):] More... | |
| def | printable_ud_misc (self) |
| def | printable_udepname (self) |
| def | printable_udephead (self) |
| def | printable_ud_feats (self, hacks=None) |
| def | printable_unimorph (self) |
| def | printable_ftb_feats (self) |
| def | get_xpos_ftb (self) |
| def | get_xpos_tdt (self) |
| def | printable_vislcg (self) |
| def | is_oov (self) |
Static Public Member Functions | |
| def | fromstr (str s) |
| def | fromomor (str s, weight=float("inf"), hacks=None) |
| def | fromvislcg (s) |
Data Fields | |
| raw | |
| upos | |
| ufeats | |
| udepname | |
| udeppos | |
| misc | |
| rawtype | |
| weight | |
| analsurf | |
| manglers | |
| lemmas | |
Contains a single analysis of a token. Analysis is a hypothesis of what token's some features may be: morphological analysis contains morphosyntactic readings and segmentation contains segment markers.
| def omorfi.analysis.Analysis.__init__ | ( | self | ) |
Create an empty analysis @param raw analysis in string form @param weight penalty weight for analysis
|
static |
Constructs analysis form Omor style string.
Typically used to create an analysis from libhfst string and weight
after using omorfi HFST analyser on a surface string.
Args:
s An omor-style analysis string, e.g.
"[WORD_ID=äh][UPOS=INTJ]"
weight A penalty-weight of the analysis
hacks Used for mangling some values based on some standards and
treebanks
Returns:
a token with omor analysis parsed into structured information
|
static |
Constructs analysis from string
|
static |
Constructs analysis from VISL-CG string. The string should match what the method printable_vislcg creates plus optional VISL CG 3 trace and such markings.
| def omorfi.analysis.Analysis.get_ftb_feats | ( | self | ) |
Get ftb analyses from token data.
| def omorfi.analysis.Analysis.get_lemmas | ( | self | ) |
Finds lemmas from analyses.
Returns:
list of strings.
| def omorfi.analysis.Analysis.get_moses_factor_segments | ( | self | ) |
Create moses factors from analyses.
| def omorfi.analysis.Analysis.get_segments | ( | self, | |
split_morphs = True, |
|||
split_words = True, |
|||
split_new_words = True, |
|||
split_derivs = False, |
|||
split_nonwords = False |
|||
| ) |
Get specified segments from segmented analysis.
| def omorfi.analysis.Analysis.get_ud_misc | ( | self | ) |
segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):]
Get random collection of analyses for token. Primarily used for UD MISC field but can be used for any extra data.
| def omorfi.analysis.Analysis.get_ufeats | ( | self | ) |
Finds UD Feats from analyses.
Returns:
dict of key value pairs of UD Feat column.
| def omorfi.analysis.Analysis.get_unimorph_feats | ( | self | ) |
Get Unimorph analyses from token data.
| def omorfi.analysis.Analysis.get_upos | ( | self | ) |
Finds UPOS from analyses.
Returns:
upos in a string
| def omorfi.analysis.Analysis.get_vislcg_feats | ( | self | ) |
Get VISL-CG 3 features from analysed token.
| def omorfi.analysis.Analysis.get_xpos_ftb | ( | self | ) |
Gets FTB-compatible part-of-speech from analysis.
| def omorfi.analysis.Analysis.get_xpos_tdt | ( | self | ) |
Get TDT-compatible part-of-speech from analysed token.
| def omorfi.analysis.Analysis.is_oov | ( | self | ) |
Figures out if this analysis was guessed for an OOV.
| def omorfi.analysis.Analysis.printable_ftb_feats | ( | self | ) |
Formats FTB feats from token data like in FTB-2014 data.
| def omorfi.analysis.Analysis.printable_ud_feats | ( | self, | |
hacks = None |
|||
| ) |
Formats UD feats from token data exactly as in fi-tdt data.
When the correct analysis is in question the result should be equal
to the UFEAT field of the connl-u data downloadable from UD web site,
in string format.
Returns:
string of |-separated key=value pairs in correct order or _
| def omorfi.analysis.Analysis.printable_ud_misc | ( | self | ) |
Formats UD misc like in UD data.
| def omorfi.analysis.Analysis.printable_udephead | ( | self | ) |
Format udep head position for CONLL-U.
Returns:
string of non-ngative integer or _
| def omorfi.analysis.Analysis.printable_udepname | ( | self | ) |
Format udep as string for CONLL-U.
Returns:
string of udep nam
| def omorfi.analysis.Analysis.printable_unimorph | ( | self | ) |
Formats FTB feats from token data like in FTB-2014 data.
| def omorfi.analysis.Analysis.printable_vislcg | ( | self | ) |
Create VISL-CG 3 output from the token.