Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
S
SearchEngine
Project
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Rosa Delima Mendrofa
SearchEngine
Commits
b3b033bf
Commit
b3b033bf
authored
May 29, 2020
by
Yolanda Nainggolan
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add phrase search
parent
8f565798
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
353 additions
and
33 deletions
+353
-33
views.cpython-37.pyc
...e/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
+0
-0
dataset_STBI.xml
SearchEngine/InvertedIndexSimulator/data/dataset_STBI.xml
+0
-0
main.cpython-37.pyc
...edIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
+0
-0
main.py
SearchEngine/InvertedIndexSimulator/inverted/main.py
+109
-1
dataframe.min.css
...nvertedIndexSimulator/static/assets/css/dataframe.min.css
+0
-13
dataframe.html
...gine/InvertedIndexSimulator/templates/apps/dataframe.html
+11
-8
home.html
SearchEngine/InvertedIndexSimulator/templates/apps/home.html
+3
-4
index.html
...chEngine/InvertedIndexSimulator/templates/apps/index.html
+2
-2
indexing.html
...ngine/InvertedIndexSimulator/templates/apps/indexing.html
+11
-2
phrases.html
...Engine/InvertedIndexSimulator/templates/apps/phrases.html
+45
-0
preprocessing.html
.../InvertedIndexSimulator/templates/apps/preprocessing.html
+9
-0
preprocessing2.html
...InvertedIndexSimulator/templates/apps/preprocessing2.html
+9
-0
preprocessing3.html
...InvertedIndexSimulator/templates/apps/preprocessing3.html
+9
-0
preprocessing4.html
...InvertedIndexSimulator/templates/apps/preprocessing4.html
+10
-1
resultphrase.html
...e/InvertedIndexSimulator/templates/apps/resultphrase.html
+56
-0
search.html
...hEngine/InvertedIndexSimulator/templates/apps/search.html
+51
-0
urls.py
SearchEngine/InvertedIndexSimulator/urls.py
+4
-0
views.py
SearchEngine/InvertedIndexSimulator/views.py
+21
-2
urls.cpython-37.pyc
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
+0
-0
urls.py
SearchEngine/SearchEngine/urls.py
+3
-0
No files found.
SearchEngine/InvertedIndexSimulator/__pycache__/views.cpython-37.pyc
View file @
b3b033bf
No preview for this file type
SearchEngine/InvertedIndexSimulator/data/dataset_STBI.xml
View file @
b3b033bf
This diff is collapsed.
Click to expand it.
SearchEngine/InvertedIndexSimulator/inverted/__pycache__/main.cpython-37.pyc
View file @
b3b033bf
No preview for this file type
SearchEngine/InvertedIndexSimulator/inverted/main.py
View file @
b3b033bf
...
...
@@ -187,7 +187,7 @@ def stemming(tokens):
return
tokens
def
searching
(
dcmnt_xml
,
query
):
def
proximity
(
dcmnt_xml
,
query
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_song
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
...
...
@@ -322,3 +322,110 @@ def detail(id):
return
lyrics
,
judul
def
phrase
(
dcmnt_xml
,
query
):
all_doc_no
=
dcmnt_xml
.
getElementsByTagName
(
'DOCNO'
)
all_song
=
dcmnt_xml
.
getElementsByTagName
(
'SONG'
)
all_lyrics
=
dcmnt_xml
.
getElementsByTagName
(
'LYRICS'
)
N_DOC
=
len
(
all_doc_no
)
all_sentence_doc
=
[]
for
i
in
range
(
N_DOC
):
sentence_doc
=
all_song
[
i
]
.
firstChild
.
data
+
' '
+
all_lyrics
[
i
]
.
firstChild
.
data
all_sentence_doc
.
append
(
sentence_doc
)
tokens_doc
=
[]
for
i
in
range
(
N_DOC
):
tokens_doc
.
append
(
remove_punc_tokenize
(
all_sentence_doc
[
i
]))
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
to_lower
(
tokens_doc
[
i
])
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
stop_word_token
(
tokens_doc
[
i
])
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
([
w
for
w
in
tokens_doc
[
i
]
if
not
any
(
j
.
isdigit
()
for
j
in
w
)])
for
i
in
range
(
N_DOC
):
tokens_doc
[
i
]
=
stemming
(
tokens_doc
[
i
])
all_tokens
=
[]
for
i
in
range
(
N_DOC
):
for
j
in
tokens_doc
[
i
]:
all_tokens
.
append
(
j
)
new_sentences
=
' '
.
join
([
w
for
w
in
all_tokens
])
for
j
in
CountVectorizer
()
.
build_tokenizer
()(
new_sentences
):
all_tokens
.
append
(
j
)
all_tokens
=
set
(
all_tokens
)
##Phrase Search##
bi_gram_tokens
=
[]
bi_gram_sentence_doc
=
[]
for
n
in
range
(
N_DOC
):
token
=
[]
for
i
in
range
(
len
(
tokens_doc
[
n
])):
if
not
(
i
==
len
(
tokens_doc
[
n
])
-
1
):
token
.
append
(
tokens_doc
[
n
][
i
]
+
'_'
+
tokens_doc
[
n
][
i
+
1
])
bi_gram_tokens
.
append
(
tokens_doc
[
n
][
i
]
+
'_'
+
tokens_doc
[
n
][
i
+
1
])
bi_gram_sentence_doc
.
append
(
' '
.
join
(
token
))
bi_gram_index
=
{}
for
bigram_token
in
bi_gram_tokens
:
doc_no
=
[]
for
i
in
range
(
N_DOC
):
if
(
bigram_token
in
bi_gram_sentence_doc
[
i
]):
doc_no
.
append
(
all_doc_no
[
i
]
.
firstChild
.
data
)
bi_gram_index
[
bigram_token
]
=
doc_no
lst_doc
=
bi_gram_index
[
query
]
for
i
in
range
(
len
(
lst_doc
)):
lst_doc
[
i
]
=
int
(
lst_doc
[
i
])
for
i
in
range
(
len
(
lst_doc
)):
lst_doc
[
i
]
=
int
(
lst_doc
[
i
])
xtree
=
et
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
xroot
=
xtree
.
getroot
()
df_cols
=
[
"SONG"
]
rows
=
[]
for
node
in
xroot
:
lirik
=
node
.
find
(
"SONG"
)
.
text
if
node
is
not
None
else
None
rows
.
append
({
"SONG"
:
lirik
})
df
=
pd
.
DataFrame
(
rows
,
columns
=
df_cols
)
judul
=
[]
for
i
in
lst_doc
:
judul
.
append
(
df
[
'SONG'
][
i
-
1
])
hasil
=
{}
for
key
in
lst_doc
:
for
value
in
judul
:
hasil
[
key
]
=
value
judul
.
remove
(
value
)
break
numb
=
[]
tit
=
[]
for
i
,
j
in
hasil
.
items
():
numb
.
append
(
i
)
tit
.
append
(
j
)
res
=
{}
for
key
in
numb
:
for
value
in
tit
:
res
[
key
]
=
value
tit
.
remove
(
value
)
break
return
res
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/static/assets/css/dataframe.min.css
View file @
b3b033bf
@import
url('https://fonts.googleapis.com/css?family=Quicksand:400,700&display=swap')
;
body
{
font-family
:
sans-serif
;
}
h2
,
h3
{
color
:
#00a2c6
}
...
...
@@ -55,13 +50,6 @@ footer {
border-radius
:
15px
;
padding
:
20px
;
margin-top
:
10px
;
width
:
100%
;
}
table
{
table-layout
:
fixed
;
border
:
1px
solid
black
;
width
:
100px
;
}
.jumbotron
{
...
...
@@ -101,7 +89,6 @@ main {
}
#content
{
width
:
100%
;
height
:
100%
;
}
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/dataframe.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
@@ -20,21 +29,15 @@
</div>
</div>
<center><
h1>
Dataset
</h1><br></center
>
<center><
p
style=
"font-size:40px;"
><strong>
Data
</strong></p
>
<table>
<tr>
<th>
DOCNO
</th>
<th>
ARTIST
</th>
<th>
SONG
</th>
<th>
LYRICS
</th>
</tr>
{% for i in
DOCNO
%}
{% for i in
LYRICS
%}
<tr>
<td>
{{ i }}
</td>
<td>
{{ j }}
</td>
<td>
{{ k }}
</td>
<td>
{{ l }}
</td>
</tr>
{% endfor %}
</table>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/home.html
View file @
b3b033bf
...
...
@@ -19,11 +19,10 @@
<main>
<div
id=
"content"
>
<article
class=
"card"
>
<center><h1>
Pilih
Dataset
</h1><br>
<center><h1>
Pilih
Metode Searching
</h1><br>
<table>
<tr>
<th><button
onclick=
"pageRedirect()"
class=
"button"
style=
"vertical-align:middle"
><span>
International Billboard Song
</span></button></th>
<td><button
class=
"button"
style=
"vertical-align:middle"
><span>
Indonesian Song
</span></button></td>
<td><button
onclick=
"data()"
class=
"button"
style=
"vertical-align:middle"
><span>
Mulai
</span></button></td>
</tr>
</table>
</center>
...
...
@@ -39,7 +38,7 @@
</body>
<script>
function
pageRedirect
()
{
function
data
()
{
window
.
location
.
href
=
"/dataframe"
;
}
</script>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/index.html
View file @
b3b033bf
...
...
@@ -16,8 +16,8 @@
</div>
</div>
<div
class=
"row"
>
<center><h1
style=
"font-size:45px"
>
Searching!
<br></h1>
<p
style=
"font-size:20px"
><strong>
Silahkan masukkan lirik dari lagu yang ingin Anda temukan
</strong></p>
<center><h1
style=
"font-size:45px"
>
Proximity Search
<br></h1>
<p
style=
"font-size:20px"
><strong>
Silahkan masukkan
satu kata dalam
lirik dari lagu yang ingin Anda temukan
</strong></p>
<form
method=
"POST"
action=
"/result/"
>
{% csrf_token %}
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/indexing.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/trying.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
@@ -21,7 +30,7 @@
</div>
<center><p
style=
"font-size:40px;"
><strong>
Indexing
</strong></p>
<table
width=
"100%"
;
border=
"1px solid black"
>
<table>
<tr>
<th>
Token
</th>
<th>
Index
</th>
...
...
@@ -47,7 +56,7 @@
<script>
function
pageRedirect_prev
()
{
window
.
location
.
href
=
"/
preprocessing4
"
;
window
.
location
.
href
=
"/
search
"
;
}
function
pageRedirect_next
()
{
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/phrases.html
0 → 100644
View file @
b3b033bf
<!DOCTYPE html>
<html
lang=
"en"
>
<head>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<body>
<main>
<div
id=
"content"
>
<article
class=
"card"
>
<div>
<div>
<button
onclick=
"pageRedirect_prev()"
class=
"button"
style=
"vertical-align:middle"
><span>
Previous
</span></button>
</div>
</div>
<div
class=
"row"
>
<center><h1
style=
"font-size:45px"
>
Phrase Search
<br></h1>
<p
style=
"font-size:20px"
><strong>
Silahkan masukkan dua kata dalam lirik dari lagu yang ingin Anda temukan (hubungkan dengan
<i>
underscore
</i>
)
</strong></p>
<p>
*Misalnya "sugar_pie"
</p>
<form
method=
"POST"
action=
"/resultphrase/"
>
{% csrf_token %}
<div
class=
"form-row"
>
<input
type=
"text"
name=
"querysearch"
placeholder=
"Masukkan Query Anda..."
>
<br>
<button
type=
"submit"
>
Cari!
</button>
</div>
</form>
</div>
</center>
</article>
</div>
</main>
</body>
<script>
function
pageRedirect_prev
()
{
window
.
location
.
href
=
"/search"
;
}
</script>
</html>
SearchEngine/InvertedIndexSimulator/templates/apps/preprocessing.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/preprocessing2.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/preprocessing3.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/preprocessing4.html
View file @
b3b033bf
...
...
@@ -6,6 +6,15 @@
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/dataframe.min.css"
rel=
"stylesheet"
>
</head>
<style>
table
{
border-collapse
:
collapse
;
}
table
,
td
,
th
{
border
:
1px
solid
black
;
}
</style>
<body>
<main>
...
...
@@ -48,7 +57,7 @@
}
function
pageRedirect_next
()
{
window
.
location
.
href
=
"/
indexing
"
;
window
.
location
.
href
=
"/
search
"
;
}
</script>
...
...
SearchEngine/InvertedIndexSimulator/templates/apps/resultphrase.html
0 → 100644
View file @
b3b033bf
<!DOCTYPE html>
<html
lang=
"en"
>
<head>
<meta
charset=
"utf-8"
>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1, shrink-to-fit=no"
>
<meta
name=
"description"
content=
""
>
<meta
name=
"author"
content=
""
>
<title>
Inverted Index
</title>
<!-- Bootstrap core CSS -->
<link
href=
"../../static/assets/vendor/bootstrap/css/bootstrap.min.css"
rel=
"stylesheet"
>
<!-- Custom fonts for this template -->
<link
href=
"../../static/assets/vendor/fontawesome-free/css/all.min.css"
rel=
"stylesheet"
>
<link
href=
"../../static/assets/vendor/simple-line-icons/css/simple-line-icons.css"
rel=
"stylesheet"
type=
"text/css"
>
<link
href=
"https://fonts.googleapis.com/css?family=Lato:300,400,700,300italic,400italic,700italic"
rel=
"stylesheet"
type=
"text/css"
>
<!-- Custom styles for this template -->
<link
href=
"../../static/assets/css/landing-page.min.css"
rel=
"stylesheet"
>
</head>
<body>
<nav
class=
"navbar navbar-light bg-light static-top"
>
<div
class=
"container"
>
<a
class=
"navbar-brand"
href=
"/"
>
Search Simulator
</a>
</div>
</nav>
<section
class=
"testimonials text-center bg-light"
>
<div
class=
"container"
>
<h2
class=
"mb-5"
>
Lagu yang sesuai dengan query "{{ query }}"
</h2>
<div
class=
"row"
>
{% for key, values in res.items %}
<div
class=
"col-lg-4"
>
<div
class=
"testimonial-item mx-auto mb-5 mb-lg-0"
>
<h5><a
href=
"/lyric/{{ key }}"
>
Lagu No: {{ key }}
</a></h5>
<h5>
"{{ values }}"
</h5>
</div>
</div>
{% endfor %}
</div>
</div>
</section>
<!-- Bootstrap core JavaScript -->
<script
src=
"../../static/assets/vendor/jquery/jquery.min.js"
></script>
<script
src=
"../../static/assets/vendor/bootstrap/js/bootstrap.bundle.min.js"
></script>
</body>
</html>
SearchEngine/InvertedIndexSimulator/templates/apps/search.html
0 → 100644
View file @
b3b033bf
<!DOCTYPE html>
<html
lang=
"en"
>
<head>
<meta
name=
"viewport"
content=
"width=device-width, initial-scale=1"
>
<title>
Song Lyric Search Engine
</title>
<link
href=
"../../static/assets/css/trying.min.css"
rel=
"stylesheet"
>
</head>
<body>
<main>
<div
id=
"content"
>
<article
class=
"card"
>
<div>
<div>
<button
onclick=
"pageRedirect_prev()"
class=
"button"
style=
"vertical-align:middle"
><span>
Previous
</span></button>
</div>
</div>
<div
class=
"row"
>
<center><h1
style=
"font-size:45px"
>
Searching!
<br></h1>
<p
style=
"font-size:20px"
><strong>
Silahkan pilih metode searching yang anda inginkan
</strong></p>
<table>
<tr>
<th><button
onclick=
"proximity()"
class=
"button"
style=
"vertical-align:middle"
><span>
Proximity Search
</span></button></th>
<td><button
onclick=
"phrase()"
class=
"button"
style=
"vertical-align:middle"
><span>
Phrase Search
</span></button></td>
</tr>
</table>
</div>
</center>
</article>
</div>
</main>
</body>
<script>
function
pageRedirect_prev
()
{
window
.
location
.
href
=
"/preproseccing4"
;
}
function
proximity
()
{
window
.
location
.
href
=
"/indexing"
;
}
function
phrase
()
{
window
.
location
.
href
=
"/phrases"
;
}
</script>
</html>
SearchEngine/InvertedIndexSimulator/urls.py
View file @
b3b033bf
...
...
@@ -14,7 +14,10 @@ urlpatterns = [
path
(
'preprocessing3/'
,
views
.
preprocessing3
),
path
(
'preprocessing4/'
,
views
.
preprocessing4
),
path
(
'indexing/'
,
views
.
indexing
),
path
(
'search/'
,
views
.
search
),
path
(
'index/'
,
views
.
index
),
path
(
'phrase/'
,
views
.
phrase
),
path
(
'result/'
,
views
.
result
),
path
(
'resultphrases/'
,
views
.
resultphrases
),
path
(
'lyric/<int:id>'
,
views
.
lyric
,
name
=
'lyric'
),
]
\ No newline at end of file
SearchEngine/InvertedIndexSimulator/views.py
View file @
b3b033bf
...
...
@@ -19,7 +19,6 @@ except ImportError: # not 2.6+ or is 3.x
except
ImportError
:
pass
def
home
(
request
):
return
render
(
request
,
'apps/home.html'
)
...
...
@@ -146,12 +145,18 @@ def indexing(request):
def
index
(
request
):
return
render
(
request
,
'apps/index.html'
)
def
phrases
(
request
):
return
render
(
request
,
'apps/phrases.html'
)
def
search
(
request
):
return
render
(
request
,
'apps/search.html'
)
def
result
(
request
):
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
if
request
.
method
==
'POST'
:
query
=
request
.
POST
[
'querysearch'
]
res
=
main
.
searching
(
dcmnt_xml
,
query
)
res
=
main
.
proximity
(
dcmnt_xml
,
query
)
content
=
{
'res'
:
res
,
...
...
@@ -160,6 +165,20 @@ def result(request):
return
render
(
request
,
'apps/result.html'
,
content
)
def
resultphrase
(
request
):
dcmnt_xml
=
minidom
.
parse
(
"InvertedIndexSimulator/data/dataset_STBI.xml"
)
if
request
.
method
==
'POST'
:
query
=
request
.
POST
[
'querysearch'
]
res
=
main
.
phrase
(
dcmnt_xml
,
query
)
content
=
{
'res'
:
res
,
'query'
:
query
}
return
render
(
request
,
'apps/resultphrase.html'
,
content
)
def
lyric
(
request
,
id
):
lyrics
,
judul
=
main
.
detail
(
id
)
...
...
SearchEngine/SearchEngine/__pycache__/urls.cpython-37.pyc
View file @
b3b033bf
No preview for this file type
SearchEngine/SearchEngine/urls.py
View file @
b3b033bf
...
...
@@ -26,8 +26,11 @@ urlpatterns = [
path
(
'preprocessing3/'
,
views
.
preprocessing3
),
path
(
'preprocessing4/'
,
views
.
preprocessing4
),
path
(
'indexing/'
,
views
.
indexing
),
path
(
'search/'
,
views
.
search
),
path
(
'index/'
,
views
.
index
),
path
(
'phrases/'
,
views
.
phrases
),
path
(
'result/'
,
views
.
result
),
path
(
'resultphrase/'
,
views
.
resultphrase
),
path
(
'lyric/<int:id>'
,
views
.
lyric
,
name
=
'lyric'
),
]
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment