omorfi 0.9.9
Open morphology of Finnish
Public Member Functions | Static Public Member Functions | Data Fields
omorfi.analysis.Analysis Class Reference

Public Member Functions

def __init__ (self)
 
def __str__ (self)
 
def get_upos (self)
 
def get_lemmas (self)
 
def get_ufeats (self)
 
def get_ftb_feats (self)
 
def get_unimorph_feats (self)
 
def get_vislcg_feats (self)
 
def get_segments (self, split_morphs=True, split_words=True, split_new_words=True, split_derivs=False, split_nonwords=False)
 
def get_moses_factor_segments (self)
 
def get_ud_misc (self)
 segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):] More...
 
def printable_ud_misc (self)
 
def printable_udepname (self)
 
def printable_udephead (self)
 
def printable_ud_feats (self, hacks=None)
 
def printable_unimorph (self)
 
def printable_ftb_feats (self)
 
def get_xpos_ftb (self)
 
def get_xpos_tdt (self)
 
def printable_vislcg (self)
 
def is_oov (self)
 

Static Public Member Functions

def fromstr (str s)
 
def fromomor (str s, weight=float("inf"), hacks=None)
 
def fromvislcg (s)
 

Data Fields

 raw
 
 upos
 
 ufeats
 
 udepname
 
 udeppos
 
 misc
 
 rawtype
 
 weight
 
 analsurf
 
 manglers
 
 lemmas
 

Detailed Description

Contains a single analysis of a token.

Analysis is a hypothesis of what token's some features may be:
morphological analysis contains morphosyntactic readings and segmentation
contains segment markers.

Constructor & Destructor Documentation

◆ __init__()

def omorfi.analysis.Analysis.__init__ (   self)
Create an empty analysis

@param raw  analysis in string form
@param weight  penalty weight for analysis

Member Function Documentation

◆ fromomor()

def omorfi.analysis.Analysis.fromomor ( str  s,
  weight = float("inf"),
  hacks = None 
)
static
Constructs analysis form Omor style string.

Typically used to create an analysis from libhfst string and weight
after using omorfi HFST analyser on a surface string.

Args:
    s       An omor-style analysis string, e.g.
        "[WORD_ID=äh][UPOS=INTJ]"
    weight  A penalty-weight of the analysis
    hacks   Used for mangling some values based on some standards and
        treebanks

Returns:
    a token with omor analysis parsed into structured information

◆ fromstr()

def omorfi.analysis.Analysis.fromstr ( str  s)
static
Constructs analysis from string

◆ fromvislcg()

def omorfi.analysis.Analysis.fromvislcg (   s)
static
Constructs analysis from VISL-CG string.

The string should match what the method printable_vislcg creates plus
optional VISL CG 3 trace and such markings.

◆ get_ftb_feats()

def omorfi.analysis.Analysis.get_ftb_feats (   self)
Get ftb analyses from token data.

◆ get_lemmas()

def omorfi.analysis.Analysis.get_lemmas (   self)
Finds lemmas from analyses.

Returns:
    list of strings.

◆ get_moses_factor_segments()

def omorfi.analysis.Analysis.get_moses_factor_segments (   self)
Create moses factors from analyses.

◆ get_segments()

def omorfi.analysis.Analysis.get_segments (   self,
  split_morphs = True,
  split_words = True,
  split_new_words = True,
  split_derivs = False,
  split_nonwords = False 
)
Get specified segments from segmented analysis.

◆ get_ud_misc()

def omorfi.analysis.Analysis.get_ud_misc (   self)

segleft = '' segright = '' if seglen == 0: segleft = '' segright = '' elif seglen == 1: segleft = options.segment_marker segright = options.segment_marker elif seglen % 2 == 0: segleft = options.segment_marker[:int(seglen / 2)] segright = options.segment_marker[int(seglen / 2):] else: segleft = options.segment_marker[:int((seglen - 1) / 2)] segright = options.segment_marker[int((seglen - 1) / 2):] moses = re.sub(r"\|", segleft + "|", moses) moses = re.sub(r" ", " " + segright, moses) last = moses.rfind(segleft + "|") moses = moses[:last + len(segleft) - 1] + moses[last + len(segleft):]

Get random collection of analyses for token.

Primarily used for UD MISC field but can be used for any extra data.

◆ get_ufeats()

def omorfi.analysis.Analysis.get_ufeats (   self)
Finds UD Feats from analyses.

Returns:
    dict of key value pairs of UD Feat column.

◆ get_unimorph_feats()

def omorfi.analysis.Analysis.get_unimorph_feats (   self)
Get Unimorph analyses from token data.

◆ get_upos()

def omorfi.analysis.Analysis.get_upos (   self)
Finds UPOS from analyses.

Returns:
    upos in a string

◆ get_vislcg_feats()

def omorfi.analysis.Analysis.get_vislcg_feats (   self)
Get VISL-CG 3 features from analysed token.

◆ get_xpos_ftb()

def omorfi.analysis.Analysis.get_xpos_ftb (   self)
Gets FTB-compatible part-of-speech from analysis.

◆ get_xpos_tdt()

def omorfi.analysis.Analysis.get_xpos_tdt (   self)
Get TDT-compatible part-of-speech from analysed token.

◆ is_oov()

def omorfi.analysis.Analysis.is_oov (   self)
Figures out if this analysis was guessed for an OOV.

◆ printable_ftb_feats()

def omorfi.analysis.Analysis.printable_ftb_feats (   self)
Formats FTB feats from token data like in FTB-2014 data.

◆ printable_ud_feats()

def omorfi.analysis.Analysis.printable_ud_feats (   self,
  hacks = None 
)
Formats UD feats from token data exactly as in fi-tdt data.

When the correct analysis is in question the result should be equal
to the UFEAT field of the connl-u data downloadable from UD web site,
in string format.

Returns:
    string of |-separated key=value pairs in correct order or _

◆ printable_ud_misc()

def omorfi.analysis.Analysis.printable_ud_misc (   self)
Formats UD misc like in UD data.

◆ printable_udephead()

def omorfi.analysis.Analysis.printable_udephead (   self)
Format udep head position for CONLL-U.

Returns:
    string of non-ngative integer or _

◆ printable_udepname()

def omorfi.analysis.Analysis.printable_udepname (   self)
Format udep as string for CONLL-U.

Returns:
    string of udep nam

◆ printable_unimorph()

def omorfi.analysis.Analysis.printable_unimorph (   self)
Formats FTB feats from token data like in FTB-2014 data.

◆ printable_vislcg()

def omorfi.analysis.Analysis.printable_vislcg (   self)
Create VISL-CG 3 output from the token.

The documentation for this class was generated from the following file: