Upload New File

parent 33b95a84
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"data = pd.read_csv(\"men-products.csv\", delimiter=',', index_col=0)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Int64Index: 61456 entries, 1 to 61503\n",
"Data columns (total 10 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 NAME 61455 non-null object\n",
" 1 CATEGORY 61456 non-null object\n",
" 2 DESCRIPTION & COLOR 61456 non-null object\n",
" 3 FABRIC 56623 non-null object\n",
" 4 IMAGE 61456 non-null object\n",
" 5 SIZE 57618 non-null object\n",
" 6 PRICE 61456 non-null object\n",
" 7 PRODUCT ID 61456 non-null int64 \n",
" 8 WEBSITE 61456 non-null object\n",
" 9 PRODUCT URL 61456 non-null object\n",
"dtypes: int64(1), object(9)\n",
"memory usage: 5.2+ MB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>total_missing</th>\n",
" <th>percent_missing</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>NAME</th>\n",
" <td>1</td>\n",
" <td>0.005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>CATEGORY</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>DESCRIPTION &amp; COLOR</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>FABRIC</th>\n",
" <td>4833</td>\n",
" <td>24.165</td>\n",
" </tr>\n",
" <tr>\n",
" <th>IMAGE</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>SIZE</th>\n",
" <td>3838</td>\n",
" <td>19.190</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRICE</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRODUCT ID</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>WEBSITE</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>PRODUCT URL</th>\n",
" <td>0</td>\n",
" <td>0.000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" total_missing percent_missing\n",
"NAME 1 0.005\n",
"CATEGORY 0 0.000\n",
"DESCRIPTION & COLOR 0 0.000\n",
"FABRIC 4833 24.165\n",
"IMAGE 0 0.000\n",
"SIZE 3838 19.190\n",
"PRICE 0 0.000\n",
"PRODUCT ID 0 0.000\n",
"WEBSITE 0 0.000\n",
"PRODUCT URL 0 0.000"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"missing_data = pd.DataFrame({'total_missing': data.isnull().sum(), 'percent_missing': (data.isnull().sum()/20000)*100})\n",
"missing_data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.duplicated().sum()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"del data['FABRIC']\n",
"del data['IMAGE']\n",
"del data['SIZE']\n",
"del data['WEBSITE']\n",
"del data['PRODUCT URL']\n",
"del data['PRICE']\n",
"del data['PRODUCT ID']"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"data.rename(columns = {'DESCRIPTION & COLOR':'DESCRIPTION'}, inplace = True) "
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"data1 = data.sample(10000, random_state=1).copy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Rename kategori produk\n",
"data1.replace({'CATEGORY': \n",
" {'accessories': 'Accesories', \n",
" 'casual-shirts': 'Casual Shirts',\n",
" 'Men-Casual-Trousers': 'Men Casual Trousers',\n",
" 'formal-shirts': 'Formal Shirts',\n",
" 'Men-Formal-Trousers': 'Men Formal Trousers',\n",
" 'men-jackets-coats': 'Men Jackets Coats',\n",
" 'men-swimwear': 'Men Swimwear',\n",
" 'men-suits': 'Men Suits'}}, \n",
" inplace= True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>NAME</th>\n",
" <th>CATEGORY</th>\n",
" <th>DESCRIPTION</th>\n",
" </tr>\n",
" <tr>\n",
" <th>SERIAL NO</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>25553</th>\n",
" <td>Fort Collins Men Red Solid Padded Jacket</td>\n",
" <td>Men Jackets Coats</td>\n",
" <td>Fort Collins Men Red Solid Padded Jacket, For...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18640</th>\n",
" <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
" <td>Men Formal Trousers</td>\n",
" <td>MANGO MAN Men Navy Blue Tailored Slim Fit Soli...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18543</th>\n",
" <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
" <td>Men Formal Trousers</td>\n",
" <td>Arrow Men Navy Blue Tapered Fit Checked Formal...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21475</th>\n",
" <td>Hanes Charcoal Grey Thermal T-Shirt</td>\n",
" <td>Innerwear &amp; Sleapwear</td>\n",
" <td>Hanes Charcoal Grey Thermal T Shirt, Hanes, T...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14859</th>\n",
" <td>Hancock Men Blue Regular Fit Striped Formal Shirt</td>\n",
" <td>Formal Shirts</td>\n",
" <td>Hancock Men Blue Regular Fit Striped Formal Sh...</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" NAME \\\n",
"SERIAL NO \n",
"25553 Fort Collins Men Red Solid Padded Jacket \n",
"18640 MANGO MAN Men Navy Blue Tailored Slim Fit Soli... \n",
"18543 Arrow Men Navy Blue Tapered Fit Checked Formal... \n",
"21475 Hanes Charcoal Grey Thermal T-Shirt \n",
"14859 Hancock Men Blue Regular Fit Striped Formal Shirt \n",
"\n",
" CATEGORY \\\n",
"SERIAL NO \n",
"25553 Men Jackets Coats \n",
"18640 Men Formal Trousers \n",
"18543 Men Formal Trousers \n",
"21475 Innerwear & Sleapwear \n",
"14859 Formal Shirts \n",
"\n",
" DESCRIPTION \n",
"SERIAL NO \n",
"25553 Fort Collins Men Red Solid Padded Jacket, For... \n",
"18640 MANGO MAN Men Navy Blue Tailored Slim Fit Soli... \n",
"18543 Arrow Men Navy Blue Tapered Fit Checked Formal... \n",
"21475 Hanes Charcoal Grey Thermal T Shirt, Hanes, T... \n",
"14859 Hancock Men Blue Regular Fit Striped Formal Sh... "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data1.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"text_total = data1.DESCRIPTION \n",
"text_total = text_total.reset_index(drop=True)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 Fort Collins Men Red Solid Padded Jacket, For...\n",
"1 MANGO MAN Men Navy Blue Tailored Slim Fit Soli...\n",
"2 Arrow Men Navy Blue Tapered Fit Checked Formal...\n",
"3 Hanes Charcoal Grey Thermal T Shirt, Hanes, T...\n",
"4 Hancock Men Blue Regular Fit Striped Formal Sh...\n",
"Name: DESCRIPTION, dtype: object\n"
]
}
],
"source": [
"print(text_total.head())"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"from nltk.corpus import stopwords\n",
"import pandas as pd\n",
"from nltk.stem import PorterStemmer\n",
"from nltk.tokenize import sent_tokenize, word_tokenize\n",
"\n",
"def review_to_words(raw_text):\n",
"\n",
" # keep only words\n",
" letters_only_text = re.sub(\"[^a-zA-Z]\", \" \", raw_text)\n",
"\n",
" # convert to lower case and split \n",
" words = letters_only_text.lower().split()\n",
"\n",
" # remove stopwords\n",
" stopword_set = set(stopwords.words(\"english\"))\n",
" meaningful_words = [w for w in words if w not in stopword_set]\n",
" \n",
" #stemmed words\n",
" ps = PorterStemmer()\n",
" stemmed_words = [ps.stem(word) for word in meaningful_words]\n",
" \n",
" #join the cleaned words in a list\n",
" cleaned_word_list = \" \".join(stemmed_words)\n",
" \n",
" return cleaned_word_list"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['fort collin men red solid pad jacket fort collin jacket topwear apparel apparel men buy fort collin men red solid pad jacket onlin india buy jacket best price', 'mango man men navi blue tailor slim fit solid formal trouser mango man trouser bottomwear apparel apparel men buy mango man men navi blue tailor slim fit solid formal trouser onlin india buy trouser best price', 'arrow men navi blue taper fit check formal trouser arrow trouser bottomwear apparel apparel men buy arrow men navi blue taper fit check formal trouser onlin india buy trouser best price', 'hane charcoal grey thermal shirt hane thermal top innerwear apparel apparel men buy hane charcoal grey thermal shirt onlin india buy thermal top best price', 'hancock men blue regular fit stripe formal shirt hancock shirt topwear apparel apparel men buy hancock men blue regular fit stripe formal shirt onlin india buy shirt best price']\n"
]
}
],
"source": [
"# apply it to our text data \n",
"# dataset is named wine_data and the text are in the column \"wmn\"\n",
"processed_wmn = [review_to_words(str(text)) for text in text_total]\n",
"print(processed_wmn[:5])"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"processed_words = \" \".join(processed_wmn).split()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"def transform_sentences_to_pair_of_words(sentences):\n",
" list_of_pair_of_words = []\n",
" for i in range(len(sentences)):\n",
" buffer_pair_of_words = (sentences[i-1],sentences[i])\n",
" list_of_pair_of_words.append(buffer_pair_of_words)\n",
" del list_of_pair_of_words[0]\n",
" return list_of_pair_of_words"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"sentences_to_pair = transform_sentences_to_pair_of_words(processed_words)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'red'),\n",
" ('red', 'solid'),\n",
" ('solid', 'pad'),\n",
" ('pad', 'jacket'),\n",
" ('jacket', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'jacket'),\n",
" ('jacket', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'red'),\n",
" ('red', 'solid'),\n",
" ('solid', 'pad'),\n",
" ('pad', 'jacket'),\n",
" ('jacket', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jacket'),\n",
" ('jacket', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'mango'),\n",
" ('mango', 'man'),\n",
" ('man', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'tailor'),\n",
" ('tailor', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'mango'),\n",
" ('mango', 'man'),\n",
" ('man', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'mango'),\n",
" ('mango', 'man'),\n",
" ('man', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'tailor'),\n",
" ('tailor', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'arrow'),\n",
" ('arrow', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'taper'),\n",
" ('taper', 'fit'),\n",
" ('fit', 'check'),\n",
" ('check', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'arrow'),\n",
" ('arrow', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'arrow'),\n",
" ('arrow', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'taper'),\n",
" ('taper', 'fit'),\n",
" ('fit', 'check'),\n",
" ('check', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'hane'),\n",
" ('hane', 'charcoal'),\n",
" ('charcoal', 'grey'),\n",
" ('grey', 'thermal'),\n",
" ('thermal', 'shirt'),\n",
" ('shirt', 'hane'),\n",
" ('hane', 'thermal'),\n",
" ('thermal', 'top'),\n",
" ('top', 'innerwear'),\n",
" ('innerwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'hane'),\n",
" ('hane', 'charcoal'),\n",
" ('charcoal', 'grey'),\n",
" ('grey', 'thermal'),\n",
" ('thermal', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'thermal'),\n",
" ('thermal', 'top'),\n",
" ('top', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'hancock'),\n",
" ('hancock', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'stripe'),\n",
" ('stripe', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'hancock'),\n",
" ('hancock', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'hancock'),\n",
" ('hancock', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'stripe'),\n",
" ('stripe', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'tantra'),\n",
" ('tantra', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'tantra'),\n",
" ('tantra', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'tantra'),\n",
" ('tantra', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'aeropostal'),\n",
" ('aeropostal', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'mildli'),\n",
" ('mildli', 'distress'),\n",
" ('distress', 'jean'),\n",
" ('jean', 'aeropostal'),\n",
" ('aeropostal', 'jean'),\n",
" ('jean', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'aeropostal'),\n",
" ('aeropostal', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'mildli'),\n",
" ('mildli', 'distress'),\n",
" ('distress', 'jean'),\n",
" ('jean', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jean'),\n",
" ('jean', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'ether'),\n",
" ('ether', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'anti'),\n",
" ('anti', 'microbi'),\n",
" ('microbi', 'cotton'),\n",
" ('cotton', 'linen'),\n",
" ('linen', 'shirt'),\n",
" ('shirt', 'ether'),\n",
" ('ether', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'ether'),\n",
" ('ether', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'anti'),\n",
" ('anti', 'microbi'),\n",
" ('microbi', 'cotton'),\n",
" ('cotton', 'linen'),\n",
" ('linen', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'roadster'),\n",
" ('roadster', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'jean'),\n",
" ('jean', 'roadster'),\n",
" ('roadster', 'jean'),\n",
" ('jean', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'roadster'),\n",
" ('roadster', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'jean'),\n",
" ('jean', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jean'),\n",
" ('jean', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'dollar'),\n",
" ('dollar', 'bigboss'),\n",
" ('bigboss', 'pack'),\n",
" ('pack', 'trunk'),\n",
" ('trunk', 'mdtr'),\n",
" ('mdtr', 'po'),\n",
" ('po', 'dollar'),\n",
" ('dollar', 'bigboss'),\n",
" ('bigboss', 'trunk'),\n",
" ('trunk', 'innerwear'),\n",
" ('innerwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'dollar'),\n",
" ('dollar', 'bigboss'),\n",
" ('bigboss', 'pack'),\n",
" ('pack', 'trunk'),\n",
" ('trunk', 'mdtr'),\n",
" ('mdtr', 'po'),\n",
" ('po', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trunk'),\n",
" ('trunk', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'print'),\n",
" ('print', 'polo'),\n",
" ('polo', 'collar'),\n",
" ('collar', 'shirt'),\n",
" ('shirt', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'print'),\n",
" ('print', 'polo'),\n",
" ('polo', 'collar'),\n",
" ('collar', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'self'),\n",
" ('self', 'design'),\n",
" ('design', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'self'),\n",
" ('self', 'design'),\n",
" ('design', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'light'),\n",
" ('light', 'blue'),\n",
" ('blue', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'skinni'),\n",
" ('skinni', 'fit'),\n",
" ('fit', 'jean'),\n",
" ('jean', 'calvin'),\n",
" ('calvin', 'klein'),\n",
" ('klein', 'jean'),\n",
" ('jean', 'jean'),\n",
" ('jean', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'light'),\n",
" ('light', 'blue'),\n",
" ('blue', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'skinni'),\n",
" ('skinni', 'fit'),\n",
" ('fit', 'jean'),\n",
" ('jean', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jean'),\n",
" ('jean', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'highland'),\n",
" ('highland', 'men'),\n",
" ('men', 'oliv'),\n",
" ('oliv', 'green'),\n",
" ('green', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'camouflag'),\n",
" ('camouflag', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'highland'),\n",
" ('highland', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'highland'),\n",
" ('highland', 'men'),\n",
" ('men', 'oliv'),\n",
" ('oliv', 'green'),\n",
" ('green', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'camouflag'),\n",
" ('camouflag', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'u'),\n",
" ('u', 'polo'),\n",
" ('polo', 'assn'),\n",
" ('assn', 'denim'),\n",
" ('denim', 'co'),\n",
" ('co', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'u'),\n",
" ('u', 'polo'),\n",
" ('polo', 'assn'),\n",
" ('assn', 'denim'),\n",
" ('denim', 'co'),\n",
" ('co', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'u'),\n",
" ('u', 'polo'),\n",
" ('polo', 'assn'),\n",
" ('assn', 'denim'),\n",
" ('denim', 'co'),\n",
" ('co', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'levi'),\n",
" ('levi', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'levi'),\n",
" ('levi', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'levi'),\n",
" ('levi', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'casual'),\n",
" ('casual', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'sport'),\n",
" ('sport', 'men'),\n",
" ('men', 'charcoal'),\n",
" ('charcoal', 'grey'),\n",
" ('grey', 'solid'),\n",
" ('solid', 'tailor'),\n",
" ('tailor', 'jacket'),\n",
" ('jacket', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'sport'),\n",
" ('sport', 'jacket'),\n",
" ('jacket', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'loui'),\n",
" ('loui', 'philipp'),\n",
" ('philipp', 'sport'),\n",
" ('sport', 'men'),\n",
" ('men', 'charcoal'),\n",
" ('charcoal', 'grey'),\n",
" ('grey', 'solid'),\n",
" ('solid', 'tailor'),\n",
" ('tailor', 'jacket'),\n",
" ('jacket', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jacket'),\n",
" ('jacket', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'killer'),\n",
" ('killer', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'jean'),\n",
" ('jean', 'killer'),\n",
" ('killer', 'jean'),\n",
" ('jean', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'killer'),\n",
" ('killer', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'jean'),\n",
" ('jean', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jean'),\n",
" ('jean', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'peter'),\n",
" ('peter', 'england'),\n",
" ('england', 'casual'),\n",
" ('casual', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'regular'),\n",
" ('regular', 'trouser'),\n",
" ('trouser', 'peter'),\n",
" ('peter', 'england'),\n",
" ('england', 'casual'),\n",
" ('casual', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'peter'),\n",
" ('peter', 'england'),\n",
" ('england', 'casual'),\n",
" ('casual', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'regular'),\n",
" ('regular', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'arrow'),\n",
" ('arrow', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'taper'),\n",
" ('taper', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'arrow'),\n",
" ('arrow', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'arrow'),\n",
" ('arrow', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'taper'),\n",
" ('taper', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'v'),\n",
" ('v', 'dot'),\n",
" ('dot', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'self'),\n",
" ('self', 'design'),\n",
" ('design', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'v'),\n",
" ('v', 'dot'),\n",
" ('dot', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'v'),\n",
" ('v', 'dot'),\n",
" ('dot', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'self'),\n",
" ('self', 'design'),\n",
" ('design', 'formal'),\n",
" ('formal', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'gespo'),\n",
" ('gespo', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'gespo'),\n",
" ('gespo', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'gespo'),\n",
" ('gespo', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'smag'),\n",
" ('smag', 'men'),\n",
" ('men', 'mustard'),\n",
" ('mustard', 'solid'),\n",
" ('solid', 'lightweight'),\n",
" ('lightweight', 'tailor'),\n",
" ('tailor', 'jacket'),\n",
" ('jacket', 'smag'),\n",
" ('smag', 'jacket'),\n",
" ('jacket', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'smag'),\n",
" ('smag', 'men'),\n",
" ('men', 'mustard'),\n",
" ('mustard', 'solid'),\n",
" ('solid', 'lightweight'),\n",
" ('lightweight', 'tailor'),\n",
" ('tailor', 'jacket'),\n",
" ('jacket', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jacket'),\n",
" ('jacket', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'jack'),\n",
" ('jack', 'jone'),\n",
" ('jone', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'regular'),\n",
" ('regular', 'trouser'),\n",
" ('trouser', 'jack'),\n",
" ('jack', 'jone'),\n",
" ('jone', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'jack'),\n",
" ('jack', 'jone'),\n",
" ('jone', 'men'),\n",
" ('men', 'black'),\n",
" ('black', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'regular'),\n",
" ('regular', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'van'),\n",
" ('van', 'heusen'),\n",
" ('heusen', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'van'),\n",
" ('van', 'heusen'),\n",
" ('heusen', 'shirt'),\n",
" ('shirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'van'),\n",
" ('van', 'heusen'),\n",
" ('heusen', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'regular'),\n",
" ('regular', 'fit'),\n",
" ('fit', 'solid'),\n",
" ('solid', 'formal'),\n",
" ('formal', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'shirt'),\n",
" ('shirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'maniac'),\n",
" ('maniac', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'solid'),\n",
" ('solid', 'v'),\n",
" ('v', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'maniac'),\n",
" ('maniac', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'maniac'),\n",
" ('maniac', 'men'),\n",
" ('men', 'grey'),\n",
" ('grey', 'solid'),\n",
" ('solid', 'v'),\n",
" ('v', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'stretchabl'),\n",
" ('stretchabl', 'crop'),\n",
" ('crop', 'jean'),\n",
" ('jean', 'jean'),\n",
" ('jean', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'men'),\n",
" ('men', 'blue'),\n",
" ('blue', 'slim'),\n",
" ('slim', 'fit'),\n",
" ('fit', 'mid'),\n",
" ('mid', 'rise'),\n",
" ('rise', 'clean'),\n",
" ('clean', 'look'),\n",
" ('look', 'stretchabl'),\n",
" ('stretchabl', 'crop'),\n",
" ('crop', 'jean'),\n",
" ('jean', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jean'),\n",
" ('jean', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'blackberri'),\n",
" ('blackberri', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'trouser'),\n",
" ('trouser', 'blackberri'),\n",
" ('blackberri', 'trouser'),\n",
" ('trouser', 'bottomwear'),\n",
" ('bottomwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'blackberri'),\n",
" ('blackberri', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'blue'),\n",
" ('blue', 'print'),\n",
" ('print', 'casual'),\n",
" ('casual', 'trouser'),\n",
" ('trouser', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'trouser'),\n",
" ('trouser', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'longlin'),\n",
" ('longlin', 'shirt'),\n",
" ('shirt', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'moda'),\n",
" ('moda', 'rapido'),\n",
" ('rapido', 'men'),\n",
" ('men', 'white'),\n",
" ('white', 'print'),\n",
" ('print', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'longlin'),\n",
" ('longlin', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'tan'),\n",
" ('tan', 'brown'),\n",
" ('brown', 'solid'),\n",
" ('solid', 'biker'),\n",
" ('biker', 'jacket'),\n",
" ('jacket', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'jacket'),\n",
" ('jacket', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'tan'),\n",
" ('tan', 'brown'),\n",
" ('brown', 'solid'),\n",
" ('solid', 'biker'),\n",
" ('biker', 'jacket'),\n",
" ('jacket', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jacket'),\n",
" ('jacket', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'rust'),\n",
" ('rust', 'brown'),\n",
" ('brown', 'solid'),\n",
" ('solid', 'biker'),\n",
" ('biker', 'jacket'),\n",
" ('jacket', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'jacket'),\n",
" ('jacket', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'fort'),\n",
" ('fort', 'collin'),\n",
" ('collin', 'men'),\n",
" ('men', 'rust'),\n",
" ('rust', 'brown'),\n",
" ('brown', 'solid'),\n",
" ('solid', 'biker'),\n",
" ('biker', 'jacket'),\n",
" ('jacket', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'jacket'),\n",
" ('jacket', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'esprit'),\n",
" ('esprit', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'white'),\n",
" ('white', 'stripe'),\n",
" ('stripe', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'esprit'),\n",
" ('esprit', 'tshirt'),\n",
" ('tshirt', 'topwear'),\n",
" ('topwear', 'apparel'),\n",
" ('apparel', 'apparel'),\n",
" ('apparel', 'men'),\n",
" ('men', 'buy'),\n",
" ('buy', 'esprit'),\n",
" ('esprit', 'men'),\n",
" ('men', 'navi'),\n",
" ('navi', 'white'),\n",
" ('white', 'stripe'),\n",
" ('stripe', 'round'),\n",
" ('round', 'neck'),\n",
" ('neck', 'shirt'),\n",
" ('shirt', 'onlin'),\n",
" ('onlin', 'india'),\n",
" ('india', 'buy'),\n",
" ('buy', 'tshirt'),\n",
" ('tshirt', 'best'),\n",
" ('best', 'price'),\n",
" ('price', 'u'),\n",
" ('u', 'polo'),\n",
" ('polo', 'assn'),\n",
" ('assn', 'men'),\n",
" ('men', 'oliv'),\n",
" ('oliv', 'green'),\n",
" ('green', 'regular'),\n",
" ...]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sentences_to_pair"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"import networkx as nx\n",
"\n",
"G = nx.DiGraph()\n",
"G.add_edges_from(sentences_to_pair)\n",
"G = G.to_undirected()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'networkx.classes.graph.Graph'>\n"
]
}
],
"source": [
"print(type(G))"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Computing transition probabilities: 100%|█████████████████████████████████████████| 1508/1508 [00:05<00:00, 292.23it/s]\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model Saved\n"
]
}
],
"source": [
"from node2vec import Node2Vec\n",
"node2vec = Node2Vec(G, dimensions=20, walk_length=16, num_walks=100, workers=2) \n",
"model = node2vec.fit(window=10, min_count=1) \n",
"print('Model Saved')"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"caffein\n",
"retro\n",
"transpar\n",
"junior\n",
"rubber\n",
"pace\n",
"assassin\n",
"emoji\n"
]
}
],
"source": [
"for node, _ in model.most_similar('black'):\n",
" if len(node) > 3:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"tshirt\n",
"proplanet\n",
"signatur\n",
"alci\n",
"price\n",
"bonati\n",
"greenturn\n",
"nautica\n",
"fritzberg\n"
]
}
],
"source": [
"for node, _ in model.most_similar('men'):\n",
" if len(node) > 3:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\ipykernel_launcher.py:1: DeprecationWarning: Call to deprecated `most_similar` (Method will be removed in 4.0.0, use self.wv.most_similar() instead).\n",
" \"\"\"Entry point for launching an IPython kernel.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"price\n",
"dennison\n",
"oxolloxo\n",
"southbay\n",
"ivoc\n",
"excalibur\n",
"rigo\n",
"smokestack\n",
"zeal\n"
]
}
],
"source": [
"for node, _ in model.most_similar('buy'):\n",
" # Show only players\n",
" if len(node) > 3:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"bottomwear\n",
"alvaro\n",
"loungewear\n",
"apparel\n",
"headwear\n",
"peter\n",
"nightwear\n",
"pacif\n"
]
}
],
"source": [
"w1 = \"best\"\n",
"for node, _ in model.wv.most_similar (positive=w1, topn=10):\n",
" # Show only players\n",
" if len(node) > 3:\n",
" print(node)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\User\\Anaconda3\\lib\\site-packages\\gensim\\models\\keyedvectors.py:877: FutureWarning: arrays to stack must be passed as a \"sequence\" type such as list or tuple. Support for non-sequence iterables such as generators is deprecated as of NumPy 1.16 and will raise an error in the future.\n",
" vectors = vstack(self.word_vec(word, use_norm=True) for word in used_words).astype(REAL)\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"men\n"
]
}
],
"source": [
"print(model.wv.doesnt_match(\"men black jeans\".split()))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('rubber', 1.6524951457977295),\n",
" ('pro', 1.6506301164627075),\n",
" ('cncpt', 1.6447763442993164),\n",
" ('nsw', 1.6380010843276978),\n",
" ('brt', 1.5377882719039917),\n",
" ('biofus', 1.5276793241500854),\n",
" ('text', 1.4686168432235718),\n",
" ('shimmer', 1.4551149606704712),\n",
" ('gsw', 1.4540029764175415),\n",
" ('floyd', 1.447547435760498)]"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.most_similar_cosmul(positive=['woman', 'black'], negative=['man'])"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"1.0"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.similarity(w1=\"black\", w2=\"black\")"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"0.38277367"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.similarity(w1=\"black\", w2=\"brown\")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'white'"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.wv.doesnt_match([\"brown\", \"white\", \"black\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment