diff --git a/richa/__pycache__/query_processing.cpython-38.pyc b/richa/__pycache__/query_processing.cpython-38.pyc
new file mode 100644
index 00000000..069705d1
Binary files /dev/null and b/richa/__pycache__/query_processing.cpython-38.pyc differ
diff --git a/richa/__pycache__/ranking.cpython-38.pyc b/richa/__pycache__/ranking.cpython-38.pyc
new file mode 100644
index 00000000..65f65614
Binary files /dev/null and b/richa/__pycache__/ranking.cpython-38.pyc differ
diff --git a/richa/app.py b/richa/app.py
new file mode 100644
index 00000000..4f1737de
--- /dev/null
+++ b/richa/app.py
@@ -0,0 +1,78 @@
+from flask import Flask, render_template, request
+import pymongo
+import os
+from flask_paginate import Pagination, get_page_args
+from ranking import Ranking
+from query_processing import QueryProcessing
+import time
+
+
+app = Flask(__name__)
+
+
+@app.route('/')
+def entry_point():
+ return render_template('home.html')
+
+
+@app.route('/search_results')
+def search_results():
+ connect_url = 'mongodb://127.0.0.1:27017/'
+
+ client = pymongo.MongoClient(connect_url, connect=False)
+
+ db = client.results
+
+ search_string = request.args.get('search')
+
+ processor = QueryProcessing(search_string)
+ keywords = processor.processor()
+
+ query = []
+
+ start = time.time()
+
+ for keyword in keywords:
+ query.extend(db.search_results.find(
+ {'$text': {'$search': keyword, '$caseSensitive': False}}))
+
+ end = time.time()
+ print(f"time to execute: {end-start}")
+
+ search_result = []
+
+ for doc in query:
+ exist = False
+ for result in search_result:
+ if result['title'] == doc['title'] or result['url'] == doc['url']:
+ exist = True
+ break
+
+ if exist == False:
+ search_result.append(doc)
+
+ rank = Ranking(search_result, search_string)
+
+ ranked_result = rank.sorted_results()
+
+ client.close()
+
+ page, per_page, offset = get_page_args(page_parameter='page',
+ per_page_parameter='per_page')
+
+ total = len(ranked_result)
+
+ pagination = Pagination(page=page, per_page=per_page, total=total,
+ css_framework='bootstrap4')
+
+ return render_template('search.html',
+ search_result=ranked_result[offset:offset+per_page],
+ page=page,
+ per_page=per_page,
+ pagination=pagination,
+ search_string=search_string
+ )
+
+
+if __name__ == '__main__':
+ app.run(debug=True)
\ No newline at end of file
diff --git a/richa/crawler/__pycache__/popular_links.cpython-38.pyc b/richa/crawler/__pycache__/popular_links.cpython-38.pyc
new file mode 100644
index 00000000..cd61faf8
Binary files /dev/null and b/richa/crawler/__pycache__/popular_links.cpython-38.pyc differ
diff --git a/richa/crawler/crawler.py b/richa/crawler/crawler.py
new file mode 100644
index 00000000..da394b04
--- /dev/null
+++ b/richa/crawler/crawler.py
@@ -0,0 +1,118 @@
+from bs4 import BeautifulSoup
+import requests
+import pymongo
+import os
+import urllib.parse
+from popular_links import Popularity
+import sys
+
+
+class Crawler():
+ connect_url = 'mongodb://127.0.0.1:27017/'
+
+ client = pymongo.MongoClient(connect_url)
+
+ db = client.results
+
+ search_results = []
+
+ url_count = 1
+
+ def start_crawl(self, url, depth):
+ robot_url = urllib.parse.urljoin(url, '/robots.txt')
+ try:
+ robots = requests.get(robot_url)
+ except BaseException:
+ print("robots not found")
+ self.crawl(url, depth)
+
+ soup = BeautifulSoup(robots.text, 'lxml')
+
+ sauce = soup.find('p').text
+
+ content = sauce.split()
+
+ disallowed_links = []
+
+ for word in content:
+ if word[0] == '/':
+ disallowed_links.append(urllib.parse.urljoin(url, word))
+ elif 'http' in word:
+ disallowed_links.append(word)
+ print("got robots!!!")
+
+ self.crawl(url, depth, disallowed_links)
+
+ def crawl(self, url, depth, *disallowed_links):
+
+ try:
+ print(f'Crawling url {self.url_count}: {url} at depth: {depth}')
+ self.url_count += 1
+ response = requests.get(url)
+
+ except BaseException:
+ print(f'Failed to perform HTTP GET request on {url}')
+ return
+
+ soup = BeautifulSoup(response.text, 'lxml')
+
+ try:
+ title = soup.find('title').text
+ description = ''
+
+ for tag in soup.findAll():
+ if tag.name == 'p':
+ description += tag.text.strip().replace('\n', '')
+
+ except BaseException:
+ print("Failed to retrieve title and description\n")
+ return
+
+ popularity = Popularity(url)
+ popularity_score = popularity.popularity_score()
+
+ query = {
+ 'url': url,
+ 'title': title,
+ 'description': description,
+ 'score': 0,
+ 'popularity': popularity_score,
+ }
+
+ search_results = self.db.search_results
+
+ search_results.insert_one(query)
+
+ search_results.create_index([
+ ('url', pymongo.TEXT),
+ ('title', pymongo.TEXT),
+ ('description', pymongo.TEXT),
+ ('score', 1),
+ ('popularity', 1)
+ ], name='search_results', default_language='english')
+
+ if depth == 0:
+ return
+
+ links = soup.findAll('a')
+
+ for link in links:
+ try:
+ if link['href'] not in disallowed_links[0]:
+ if 'http' in link['href']:
+ self.crawl(link['href'], depth -1
+ , disallowed_links[0])
+ else:
+ link['href'] = urllib.parse.urljoin(url, link['href'])
+ self.crawl(link['href'], depth-1, disallowed_links[0])
+ except KeyError:
+ print("no links to retrieve in the website entered!!!")
+ pass
+
+ self.client.close()
+
+
+spider = Crawler()
+
+spider.start_crawl(
+ sys.argv[1], int(sys.argv[2]))
\ No newline at end of file
diff --git a/richa/crawler/popular_links.py b/richa/crawler/popular_links.py
new file mode 100644
index 00000000..64f3eb63
--- /dev/null
+++ b/richa/crawler/popular_links.py
@@ -0,0 +1,18 @@
+class Popularity():
+ popular_domains = [
+ 'https://pypi.org/', 'https://www.indiatoday.in/',
+ ]
+
+ ps = 0
+
+ def __init__(self, url):
+ self.url = url
+
+ def popularity_score(self):
+ for domain in self.popular_domains:
+ if domain == self.url:
+ self.ps += 100/len(self.popular_domains)
+ if domain in self.url:
+ self.ps += 100/len(self.popular_domains)
+
+ return self.ps
\ No newline at end of file
diff --git a/richa/query_processing.py b/richa/query_processing.py
new file mode 100644
index 00000000..858b390c
--- /dev/null
+++ b/richa/query_processing.py
@@ -0,0 +1,37 @@
+import nltk
+from nltk.corpus import stopwords
+nltk.download('punkt')
+nltk.download('stopwords')
+from nltk.tokenize import word_tokenize
+from nltk.stem.porter import PorterStemmer
+from spellchecker import SpellChecker
+import string
+
+
+
+class QueryProcessing():
+ def __init__(self, search_string):
+ self.search_string = search_string
+
+ def processor(self):
+ old_query = self.search_string
+
+ self.search_string = self.search_string.lower()
+
+ translator = str.maketrans('', '', string.punctuation)
+ self.search_string = self.search_string.translate(translator)
+
+ self.search_string = " ".join(self.search_string.split())
+
+ stop_words = set(stopwords.words("english"))
+ word_tokens = word_tokenize(self.search_string)
+ tokens = [word for word in word_tokens if word not in stop_words]
+
+ stemmer = PorterStemmer()
+ tokens = [stemmer.stem(word) for word in tokens]
+
+ spell = SpellChecker()
+ for i in range(len(tokens)):
+ tokens[i] = spell.correction(tokens[i])
+
+ return tokens
\ No newline at end of file
diff --git a/richa/ranking.py b/richa/ranking.py
new file mode 100644
index 00000000..1590fc3e
--- /dev/null
+++ b/richa/ranking.py
@@ -0,0 +1,61 @@
+from operator import itemgetter
+import string
+
+
+class Ranking:
+ def __init__(self, results, query):
+ self.results = results
+ self.query = query
+
+ def search(self):
+ res = []
+ filtered = []
+ if '"' in self.query:
+ x = '"'
+ y = ' '
+ z = ' " '
+ mytable = self.query.maketrans(x, y, z)
+ res.insert(0, self.query.translate(mytable))
+ else:
+ if ':' in self.query: # filter by url search query => query:url
+ key = self.query.split(':')[0]
+ fil = self.query.split(':')[1]
+ print(key)
+ print(fil)
+ for result in self.results:
+ if fil.lower() in result['url'].lower():
+ filtered.append(result)
+ self.results = filtered
+ elif '-' in self.query:
+ key = self.query.split('-')[0]
+ fil = self.query.split('-')[1]
+ for result in self.results:
+ if fil.lower() not in result['title'].lower() or fil.lower() not in result['description'].lower():
+ filtered.append(result)
+ self.results = filtered
+ else:
+ key = self.query
+
+ res = key.split()
+ return res
+
+ def ranked_results(self):
+
+ keywords = self.search()
+ for key in keywords:
+ for result in self.results:
+ if key.lower() in result['title'].lower():
+ result['score'] += 2
+ if key.lower() in result['description'].lower():
+ result['score'] += 1
+
+ return self.results
+
+ def sorted_results(self):
+
+ ranked_searches = self.ranked_results()
+
+ sorted_searches = sorted(
+ ranked_searches, key=itemgetter('popularity', 'score'), reverse=True)
+
+ return sorted_searches
\ No newline at end of file
diff --git a/richa/readme.md b/richa/readme.md
new file mode 100644
index 00000000..01755b0a
--- /dev/null
+++ b/richa/readme.md
@@ -0,0 +1,18 @@
+# Glugle
+### A search engine which shows results fetched from a few websites
+
+
+### Demo
+
+![Alt Text](static/glugle.gif)
+
+![Alt Text](static/results.png)
+
+### Tech Stack used:
+> Python
+> Mondodb
+> Flask
+
+### Future Aspects
+
+> It can be improved furthur by adding voice based searches and user log-in.
diff --git a/richa/static/css/style.css b/richa/static/css/style.css
new file mode 100644
index 00000000..63da678d
--- /dev/null
+++ b/richa/static/css/style.css
@@ -0,0 +1,116 @@
+body {
+
+ background-color: white;
+ color: rgb(48, 46, 46);
+ font-size: 25px;
+ }
+
+ .dark-mode {
+ background-color: rgb(48, 46, 46);
+ color: white;
+ }
+
+.nav-link active svg{
+ size: 20px;
+}
+:root {
+ --navHeight: 30px;
+ }
+
+
+
+ .switch {
+ width: 55px;
+ display: flex;
+ align-items: center;
+ justify-content: space-between;
+ }
+
+ .switch div {
+ position: relative;
+ display: block;
+ background: #eee;
+ width: 69px;
+ border-radius: 50px;
+ padding: 0 5px;
+ box-sizing: border-box;
+ cursor: pointer;
+ }
+
+ .fa-adjust {
+ transform: rotate(180deg);
+ }
+
+ .switch input {
+ display: none;
+ }
+
+ .slider {
+ background-color: rgb(166, 174, 221);
+ transition: 0.4s;
+ border-radius: 34px;
+ height: 15px;
+ width: 20px;
+ display: inline-block;
+ position: relative;
+ }
+
+ input:checked + .slider {
+ transform: translateX(16px);
+ }
+
+ nav {
+ background: #d2cca1;
+ box-shadow: 0 0 4px rgba(0, 0, 0, 0.3);
+ font-size: 1.1rem;
+ position: relative;
+ }
+
+ nav ul {
+ list-style-type: none;
+ }
+
+
+
+ nav ul li {
+ /* padding: 12px 10px; */
+ cursor: pointer;
+ transition: background 0.3s ease;
+ border-radius: 4px;
+ position: relative;
+ }
+
+
+
+
+
+
+ .take_query{
+ border-radius: 20px;
+ outline:none;
+ border: 1.5px solid rgb(31, 31, 32);
+ width: 500px;
+}
+.take_query :hover{
+ outline: blue;
+ border: 3px solid white;
+
+}
+
+.button_submit{
+ display: inline-block;
+ background-color: rgb(88, 91, 105);
+ border-radius: 20px;
+ border:none;
+ outline: none !important;
+ margin-top:20px; color: white;
+ padding: 8px;
+
+
+}
+
+.button_submit:hover{
+ transition: 0.3s all ease-in-out;
+ background-color:rgb(181, 182, 187);
+ color: black;
+}
\ No newline at end of file
diff --git a/richa/static/glugle.gif b/richa/static/glugle.gif
new file mode 100644
index 00000000..2fa850ea
Binary files /dev/null and b/richa/static/glugle.gif differ
diff --git a/richa/static/results.png b/richa/static/results.png
new file mode 100644
index 00000000..0aef0cbb
Binary files /dev/null and b/richa/static/results.png differ
diff --git a/richa/templates/base.html b/richa/templates/base.html
new file mode 100644
index 00000000..d0d1b66f
--- /dev/null
+++ b/richa/templates/base.html
@@ -0,0 +1,59 @@
+
+
+
Showing results for '{{search_string}}'
+{{ link.description[:300] }}...
+