Skip to content

Commit

Permalink
Crawled osu music packs
Browse files Browse the repository at this point in the history
  • Loading branch information
MewX committed Jun 18, 2018
1 parent 953e301 commit ba908fc
Show file tree
Hide file tree
Showing 3 changed files with 12 additions and 4 deletions.
4 changes: 3 additions & 1 deletion WebCrawlers/osu.ppy.sh/getlists.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,14 @@ def download(url):
print(e)
print('-- trying again: {}'.format(url))
continue
return b
return b.decode("utf-8")


"""
the main function
"""
list_page = download(FULL_LIST)
print(list_page)
propertyDb = PropertyDb()
for match in re.finditer(LIST_PATTERN, list_page, re.DOTALL | re.MULTILINE):
key = match.group(1)
Expand All @@ -53,4 +54,5 @@ def download(url):
# if not found
print('-- ERROR not found things on page on key {}: {}'.format(key, current_page))

propertyDb.close()
print('done')
10 changes: 8 additions & 2 deletions WebCrawlers/osu.ppy.sh/property_db.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,19 @@ def init_db(self):
self.conn.execute('CREATE TABLE IF NOT EXISTS properties (name TEXT PRIMARY KEY, value TEXT, time DATETIME DEFAULT current_timestamp);')


def close(self):
self.conn.close()


def does_record_exist(self, key):
return self.get_value(key) is not None

def save_or_overwrite_data(self, key, value):
self.conn.execute("INSERT OR REPLACE INTO properties(name, value, time) VALUES (?, ?, current_timestamp);", (key, value,))
self.conn.commit()


def get_value(self, key):
self.conn.execute("SELECT * FROM properties WHERE name = ?;", (key,))
return self.conn.fetchone()
cur = self.conn.cursor()
cur.execute("SELECT * FROM properties WHERE name = ?;", (key,))
return cur.fetchone()
2 changes: 1 addition & 1 deletion WebCrawlers/osu.ppy.sh/settings.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# NOTE: DO NOT COMMIT THIS FILE AFTER FILLING SECRET INFORMATION
DB_NAME = 'url.db'
COOKIES = '' # TODO: fill this
COOKIES = 'last_login=YOUR_COOKIES...; osu_site_v=old' # TODO: fill this using the ode style cookie
USER_AGENT = 'Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.87 Safari/537.36'

0 comments on commit ba908fc

Please sign in to comment.