Find Cheaper University Textbooks
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 
 

121 lines
3.2 KiB

from .utils import DslBase
__all__ = [
'tokenizer', 'analyzer', 'char_filter', 'token_filter'
]
class AnalysisBase(object):
@classmethod
def _type_shortcut(cls, name_or_instance, type=None, **kwargs):
if isinstance(name_or_instance, cls):
if type or kwargs:
raise ValueError('%s() cannot accept parameters.' % cls.__name__)
return name_or_instance
if not (type or kwargs):
return cls.get_dsl_class('builtin')(name_or_instance)
return cls.get_dsl_class('custom')(name_or_instance, type or 'custom', **kwargs)
class CustomAnalysis(object):
name = 'custom'
def __init__(self, name, builtin_type='custom', **kwargs):
self._builtin_type = builtin_type
self._name = name
super(CustomAnalysis, self).__init__(**kwargs)
def to_dict(self):
# only name to present in lists
return self._name
def get_definition(self):
d = super(CustomAnalysis, self).to_dict()
d = d.pop(self.name)
d['type'] = self._builtin_type
return d
class BuiltinAnalysis(object):
name = 'builtin'
def __init__(self, name):
self._name = name
super(BuiltinAnalysis, self).__init__()
def to_dict(self):
# only name to present in lists
return self._name
class Analyzer(AnalysisBase, DslBase):
_type_name = 'analyzer'
name = None
class BuiltinAnalyzer(BuiltinAnalysis, Analyzer):
def get_analysis_definition(self):
return {}
class CustomAnalyzer(CustomAnalysis, Analyzer):
_param_defs = {
'filter': {'type': 'token_filter', 'multi': True},
'char_filter': {'type': 'char_filter', 'multi': True},
'tokenizer': {'type': 'tokenizer'},
}
def get_analysis_definition(self):
out = {'analyzer': {self._name: self.get_definition()}}
t = getattr(self, 'tokenizer', None)
if hasattr(t, 'get_definition'):
out['tokenizer'] = {t._name: t.get_definition()}
filters = dict((f._name, f.get_definition())
for f in self.filter if hasattr(f, 'get_definition'))
if filters:
out['filter'] = filters
char_filters = dict((f._name, f.get_definition())
for f in self.char_filter if hasattr(f, 'get_definition'))
if char_filters:
out['char_filter'] = char_filters
return out
class Tokenizer(AnalysisBase, DslBase):
_type_name = 'tokenizer'
name = None
class BuiltinTokenizer(BuiltinAnalysis, Tokenizer):
pass
class CustomTokenizer(CustomAnalysis, Tokenizer):
pass
class TokenFilter(AnalysisBase, DslBase):
_type_name = 'token_filter'
name = None
class BuiltinTokenFilter(BuiltinAnalysis, TokenFilter):
pass
class CustomTokenFilter(CustomAnalysis, TokenFilter):
pass
class CharFilter(AnalysisBase, DslBase):
_type_name = 'char_filter'
name = None
class BuiltinCharFilter(BuiltinAnalysis, CharFilter):
pass
class CustomCharFilter(CustomAnalysis, CharFilter):
pass
# shortcuts for direct use
analyzer = Analyzer._type_shortcut
tokenizer = Tokenizer._type_shortcut
token_filter = TokenFilter._type_shortcut
char_filter = CharFilter._type_shortcut