![]() |
omorfi 0.9.9
Open morphology of Finnish
|
Public Member Functions | |
def | __init__ (self) |
def | __str__ (self) |
def | get_upos (self) |
def | get_lemmas (self) |
def | get_ufeats (self) |
def | get_ftb_feats (self) |
def | get_unimorph_feats (self) |
def | get_vislcg_feats (self) |
def | get_segments (self, split_morphs=True, split_words=True, split_new_words=True, split_derivs=False, split_nonwords=False) |
def | get_moses_factor_segments (self) |
def | get_ud_misc (self) |
segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):] More... | |
def | printable_ud_misc (self) |
def | printable_udepname (self) |
def | printable_udephead (self) |
def | printable_ud_feats (self, hacks=None) |
def | printable_unimorph (self) |
def | printable_ftb_feats (self) |
def | get_xpos_ftb (self) |
def | get_xpos_tdt (self) |
def | printable_vislcg (self) |
def | is_oov (self) |
Static Public Member Functions | |
def | fromstr (str s) |
def | fromomor (str s, weight=float("inf"), hacks=None) |
def | fromvislcg (s) |
Data Fields | |
raw | |
upos | |
ufeats | |
udepname | |
udeppos | |
misc | |
rawtype | |
weight | |
analsurf | |
manglers | |
lemmas | |
Contains a single analysis of a token. Analysis is a hypothesis of what token's some features may be: morphological analysis contains morphosyntactic readings and segmentation contains segment markers.
def omorfi.analysis.Analysis.__init__ | ( | self | ) |
Create an empty analysis @param raw analysis in string form @param weight penalty weight for analysis
|
static |
Constructs analysis form Omor style string. Typically used to create an analysis from libhfst string and weight after using omorfi HFST analyser on a surface string. Args: s An omor-style analysis string, e.g. "[WORD_ID=äh][UPOS=INTJ]" weight A penalty-weight of the analysis hacks Used for mangling some values based on some standards and treebanks Returns: a token with omor analysis parsed into structured information
|
static |
Constructs analysis from string
|
static |
Constructs analysis from VISL-CG string. The string should match what the method printable_vislcg creates plus optional VISL CG 3 trace and such markings.
def omorfi.analysis.Analysis.get_ftb_feats | ( | self | ) |
Get ftb analyses from token data.
def omorfi.analysis.Analysis.get_lemmas | ( | self | ) |
Finds lemmas from analyses. Returns: list of strings.
def omorfi.analysis.Analysis.get_moses_factor_segments | ( | self | ) |
Create moses factors from analyses.
def omorfi.analysis.Analysis.get_segments | ( | self, | |
split_morphs = True , |
|||
split_words = True , |
|||
split_new_words = True , |
|||
split_derivs = False , |
|||
split_nonwords = False |
|||
) |
Get specified segments from segmented analysis.
def omorfi.analysis.Analysis.get_ud_misc | ( | self | ) |
segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):]
Get random collection of analyses for token. Primarily used for UD MISC field but can be used for any extra data.
def omorfi.analysis.Analysis.get_ufeats | ( | self | ) |
Finds UD Feats from analyses. Returns: dict of key value pairs of UD Feat column.
def omorfi.analysis.Analysis.get_unimorph_feats | ( | self | ) |
Get Unimorph analyses from token data.
def omorfi.analysis.Analysis.get_upos | ( | self | ) |
Finds UPOS from analyses. Returns: upos in a string
def omorfi.analysis.Analysis.get_vislcg_feats | ( | self | ) |
Get VISL-CG 3 features from analysed token.
def omorfi.analysis.Analysis.get_xpos_ftb | ( | self | ) |
Gets FTB-compatible part-of-speech from analysis.
def omorfi.analysis.Analysis.get_xpos_tdt | ( | self | ) |
Get TDT-compatible part-of-speech from analysed token.
def omorfi.analysis.Analysis.is_oov | ( | self | ) |
Figures out if this analysis was guessed for an OOV.
def omorfi.analysis.Analysis.printable_ftb_feats | ( | self | ) |
Formats FTB feats from token data like in FTB-2014 data.
def omorfi.analysis.Analysis.printable_ud_feats | ( | self, | |
hacks = None |
|||
) |
Formats UD feats from token data exactly as in fi-tdt data. When the correct analysis is in question the result should be equal to the UFEAT field of the connl-u data downloadable from UD web site, in string format. Returns: string of |-separated key=value pairs in correct order or _
def omorfi.analysis.Analysis.printable_ud_misc | ( | self | ) |
Formats UD misc like in UD data.
def omorfi.analysis.Analysis.printable_udephead | ( | self | ) |
Format udep head position for CONLL-U. Returns: string of non-ngative integer or _
def omorfi.analysis.Analysis.printable_udepname | ( | self | ) |
Format udep as string for CONLL-U. Returns: string of udep nam
def omorfi.analysis.Analysis.printable_unimorph | ( | self | ) |
Formats FTB feats from token data like in FTB-2014 data.
def omorfi.analysis.Analysis.printable_vislcg | ( | self | ) |
Create VISL-CG 3 output from the token.