Commit 96170f4c by Sartika Aritonang

Upload New File

parent d101944f
from Sastrawi.Stemmer.ConfixStripping.PrecedenceAdjustmentSpecification \
import PrecedenceAdjustmentSpecification
class Context(object):
"""Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS"""
def __init__(self, original_word, dictionary, visitor_provider):
self.original_word = original_word
self.current_word = original_word
self.dictionary = dictionary
self.visitor_provider = visitor_provider
self.process_is_stopped = False
self.removals = []
self.visitors = []
self.suffix_visitors = []
self.prefix_pisitors = []
self.result = ''
self.init_visitors()
def init_visitors(self):
self.visitors = self.visitor_provider.get_visitors()
self.suffix_visitors = self.visitor_provider.get_suffix_visitors()
self.prefix_pisitors = self.visitor_provider.get_prefix_visitors()
def stopProcess(self):
self.process_is_stopped = True
def add_removal(self, removal):
self.removals.append(removal)
def execute(self):
"""Execute stemming process; the result can be retrieved with result"""
#step 1 - 5
self.start_stemming_process()
#step 6
if self.dictionary.contains(self.current_word):
self.result = self.current_word
else:
self.result = self.original_word
def start_stemming_process(self):
#step 1
if self.dictionary.contains(self.current_word):
return
self.accept_visitors(self.visitors)
if self.dictionary.contains(self.current_word):
return
csPrecedenceAdjustmentSpecification = PrecedenceAdjustmentSpecification()
#Confix Stripping
#Try to remove prefix before suffix if the specification is met
if csPrecedenceAdjustmentSpecification.is_satisfied_by(self.original_word):
#step 4, 5
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return
#step 2, 3
self.remove_suffixes()
if self.dictionary.contains(self.current_word):
return
else:
#if the trial is failed, restore the original word
#and continue to normal rule precedence (suffix first, prefix afterwards)
self.current_word = self.original_word
self.removals = []
#step 2, 3
self.remove_suffixes()
if self.dictionary.contains(self.current_word):
return
#step 4, 5
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return
#ECS loop pengembalian akhiran
self.loop_pengembalian_akhiran()
def remove_prefixes(self):
for i in range(3):
self.accept_prefix_visitors(self.prefix_pisitors)
if self.dictionary.contains(self.current_word):
return
def remove_suffixes(self):
self.accept_visitors(self.suffix_visitors)
def accept(self, visitor):
visitor.visit(self)
def accept_visitors(self, visitors):
for visitor in visitors:
self.accept(visitor)
if self.dictionary.contains(self.current_word):
return self.current_word
if self.process_is_stopped:
return self.current_word
def accept_prefix_visitors(self, visitors):
removalCount = len(self.removals)
for visitor in visitors:
self.accept(visitor)
if self.dictionary.contains(self.current_word):
return self.current_word
if self.process_is_stopped:
return self.current_word
if len(self.removals) > removalCount:
return
def loop_pengembalian_akhiran(self):
"""ECS Loop Pengembalian Akhiran"""
self.restore_prefix()
removals = self.removals
reversed_removals = reversed(removals)
current_word = self.current_word
for removal in reversed_removals:
if not self.is_suffix_removal(removal):
continue
if removal.get_removed_part() == 'kan':
self.current_word = removal.result + 'k'
#step 4,5
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return
self.current_word = removal.result + 'kan'
else:
self.current_word = removal.get_subject()
#step 4,5
self.remove_prefixes()
if self.dictionary.contains(self.current_word):
return
self.removals = removals
self.current_word = current_word
def is_suffix_removal(self, removal):
"""Check wether the removed part is a suffix"""
return removal.get_affix_type() == 'DS' \
or removal.get_affix_type() == 'PP' \
or removal.get_affix_type() == 'P'
def restore_prefix(self):
"""Restore prefix to proceed with ECS loop pengembalian akhiran"""
for removal in self.removals:
#return the word before precoding (the subject of first prefix removal)
self.current_word = removal.get_subject()
break
for removal in self.removals:
if removal.get_affix_type() == 'DP':
self.removals.remove(removal)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment