Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
N
news
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Sartika Aritonang
news
Commits
96170f4c
Commit
96170f4c
authored
May 29, 2020
by
Sartika Aritonang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Upload New File
parent
d101944f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
164 additions
and
0 deletions
+164
-0
Context.py
stbi/Lib/site-packages/Sastrawi/Stemmer/Context/Context.py
+164
-0
No files found.
stbi/Lib/site-packages/Sastrawi/Stemmer/Context/Context.py
0 → 100644
View file @
96170f4c
from
Sastrawi.Stemmer.ConfixStripping.PrecedenceAdjustmentSpecification
\
import
PrecedenceAdjustmentSpecification
class
Context
(
object
):
"""Stemming Context using Nazief and Adriani, CS, ECS, Improved ECS"""
def
__init__
(
self
,
original_word
,
dictionary
,
visitor_provider
):
self
.
original_word
=
original_word
self
.
current_word
=
original_word
self
.
dictionary
=
dictionary
self
.
visitor_provider
=
visitor_provider
self
.
process_is_stopped
=
False
self
.
removals
=
[]
self
.
visitors
=
[]
self
.
suffix_visitors
=
[]
self
.
prefix_pisitors
=
[]
self
.
result
=
''
self
.
init_visitors
()
def
init_visitors
(
self
):
self
.
visitors
=
self
.
visitor_provider
.
get_visitors
()
self
.
suffix_visitors
=
self
.
visitor_provider
.
get_suffix_visitors
()
self
.
prefix_pisitors
=
self
.
visitor_provider
.
get_prefix_visitors
()
def
stopProcess
(
self
):
self
.
process_is_stopped
=
True
def
add_removal
(
self
,
removal
):
self
.
removals
.
append
(
removal
)
def
execute
(
self
):
"""Execute stemming process; the result can be retrieved with result"""
#step 1 - 5
self
.
start_stemming_process
()
#step 6
if
self
.
dictionary
.
contains
(
self
.
current_word
):
self
.
result
=
self
.
current_word
else
:
self
.
result
=
self
.
original_word
def
start_stemming_process
(
self
):
#step 1
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
self
.
accept_visitors
(
self
.
visitors
)
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
csPrecedenceAdjustmentSpecification
=
PrecedenceAdjustmentSpecification
()
#Confix Stripping
#Try to remove prefix before suffix if the specification is met
if
csPrecedenceAdjustmentSpecification
.
is_satisfied_by
(
self
.
original_word
):
#step 4, 5
self
.
remove_prefixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
#step 2, 3
self
.
remove_suffixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
else
:
#if the trial is failed, restore the original word
#and continue to normal rule precedence (suffix first, prefix afterwards)
self
.
current_word
=
self
.
original_word
self
.
removals
=
[]
#step 2, 3
self
.
remove_suffixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
#step 4, 5
self
.
remove_prefixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
#ECS loop pengembalian akhiran
self
.
loop_pengembalian_akhiran
()
def
remove_prefixes
(
self
):
for
i
in
range
(
3
):
self
.
accept_prefix_visitors
(
self
.
prefix_pisitors
)
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
def
remove_suffixes
(
self
):
self
.
accept_visitors
(
self
.
suffix_visitors
)
def
accept
(
self
,
visitor
):
visitor
.
visit
(
self
)
def
accept_visitors
(
self
,
visitors
):
for
visitor
in
visitors
:
self
.
accept
(
visitor
)
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
self
.
current_word
if
self
.
process_is_stopped
:
return
self
.
current_word
def
accept_prefix_visitors
(
self
,
visitors
):
removalCount
=
len
(
self
.
removals
)
for
visitor
in
visitors
:
self
.
accept
(
visitor
)
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
self
.
current_word
if
self
.
process_is_stopped
:
return
self
.
current_word
if
len
(
self
.
removals
)
>
removalCount
:
return
def
loop_pengembalian_akhiran
(
self
):
"""ECS Loop Pengembalian Akhiran"""
self
.
restore_prefix
()
removals
=
self
.
removals
reversed_removals
=
reversed
(
removals
)
current_word
=
self
.
current_word
for
removal
in
reversed_removals
:
if
not
self
.
is_suffix_removal
(
removal
):
continue
if
removal
.
get_removed_part
()
==
'kan'
:
self
.
current_word
=
removal
.
result
+
'k'
#step 4,5
self
.
remove_prefixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
self
.
current_word
=
removal
.
result
+
'kan'
else
:
self
.
current_word
=
removal
.
get_subject
()
#step 4,5
self
.
remove_prefixes
()
if
self
.
dictionary
.
contains
(
self
.
current_word
):
return
self
.
removals
=
removals
self
.
current_word
=
current_word
def
is_suffix_removal
(
self
,
removal
):
"""Check wether the removed part is a suffix"""
return
removal
.
get_affix_type
()
==
'DS'
\
or
removal
.
get_affix_type
()
==
'PP'
\
or
removal
.
get_affix_type
()
==
'P'
def
restore_prefix
(
self
):
"""Restore prefix to proceed with ECS loop pengembalian akhiran"""
for
removal
in
self
.
removals
:
#return the word before precoding (the subject of first prefix removal)
self
.
current_word
=
removal
.
get_subject
()
break
for
removal
in
self
.
removals
:
if
removal
.
get_affix_type
()
==
'DP'
:
self
.
removals
.
remove
(
removal
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment