Skip to content
This repository has been archived by the owner on Mar 27, 2018. It is now read-only.

Commit

Permalink
Resolved merge conflicts
Browse files Browse the repository at this point in the history
  • Loading branch information
viyatb committed Jul 15, 2014
2 parents 8328b38 + 94a9292 commit 3d70966
Show file tree
Hide file tree
Showing 20 changed files with 1,486 additions and 349 deletions.
45 changes: 2 additions & 43 deletions controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,48 +32,7 @@
'''

import utils

from robot import *
from graph import StateFlowGraph as sfg
from state import StateMachine
from spider import Spider

import threading
import logging

## Set up logger ######################
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# create a file handler
handler = logging.FileHandler('debug.log')
handler.setLevel(logging.INFO)

# create console handler with a higher log level
ch = logging.StreamHandler()
ch.setLevel(logging.ERROR)

# create a logging format
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
ch.setFormatter(formatter)

# add the handlers to the logger
logger.addHandler(handler)
logger.addHandler(ch)

#######################################


def start(Spider):
"""
Controls the start of the Spider instance
"""



def stop():


def pause():

def
from main import Config
79 changes: 31 additions & 48 deletions robot.py → embedded_browser.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python2
#!/usr/bin/python2
# -*- coding: utf-8 -*-
'''
owtf is an OWASP+PTES-focused try to unite great tools and facilitate pen testing
Expand Down Expand Up @@ -35,57 +35,49 @@
import simplejson as json
from lxml import html

from urllib2 import urlopen
import urllib2

from selenium.webdriver import *
from selenium.webdriver.common.keys import Keys
from selenium.common.exceptions import *
from selenium.webdriver.support.ui import WebDriverWait

from main import Config

## TODO: Possible optimization in form of passing DOM tree to lxml for analysing and parsing

## TODO: Possible optimization in form of passing DOM tree to lxml for analysing and parsing
## Right now, for initial implementation should only include using Selenium to perform DOM tree transversal

#***************************** BUILD your browser here************************************
class WebDriverFactory(object):
"""
This takes care of building a browser based on config file
"""

def __init__(self, config):
with open('config.json') as config:
data = json.load(config)
self.config = data
def __init__(self, Config):
self.Config = Config

def create_webdriver(self, driver):
def create_webdriver(self):
"""
create a browser based on WebDriverWait
"""
# handle each case
if driver == "firefox":
if self.Core.CONFIG["driver"] == "firefox":
profile = FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", "127.0.0.1")
profile.set_preference("network.proxy.http_port", "8008")
#use proxy for everything, including localhost
profile.set_preference("network.proxy.no_proxies_on", "");
profile.update_preferences()
browser = Firefox(firefox_profile=profile)

return browser

elif driver == "chrome":
elif self.Core.CONFIG["driver"] == "chrome":
options = ChromeOptions()
# set proxy options
options.add_arguments("--proxy-server=http://127.0.0.1:8008/")
browser = Chrome(executable_path=self.config["chromedriver_path"], options)

return browser

elif driver == "phantomjs":
#proxy configuration
elif self.Core.CONFIG["driver"] == "phantomjs":
service_args = (
'--proxy=127.0.0.1:8008',\
'--proxy-type=http',\
Expand All @@ -98,21 +90,18 @@ def create_webdriver(self, driver):

class WebDriverManager(object):

# Config setting to use new webdriver instance per thread.
ENABLE_THREADING_SUPPORT = "browser["threaded"]"

# Config setting to reuse browser instances between WebdriverManager.new_driver() calls.
INSTANCES = "browser["instances"]"


def __init__(self, config, webdriver_factory):
with open('config.json') as config:
data = json.load(config)
self.config = data
def __init__(self, Core, webdriver_factory):
self.Core = Core
self.__webdriver = {} # Object with channel as a key
self.__registered_drivers = {}
self._webdriver_factory = WebDriverFactory()

# Config setting to use new webdriver instance per thread.
ENABLE_THREADING_SUPPORT = self.Core.CONFIG["browser.threaded"]

# Config setting to reuse browser instances between WebdriverManager.new_driver() calls.
INSTANCES = self.Core.CONFIG["browser.instances"]

def get_driver(self):
"""
Get an already running instance of Webdriver. If there is none, it will create one.
Expand Down Expand Up @@ -154,20 +143,15 @@ def new_driver(self):
driver = self.__get_driver_for_channel(channel)

# if self.__config.get(WebDriverManager.REUSE_BROWSER, True):
if driver is None:
driver = self._webdriver_factory.create_webdriver(# global browser setting)
if driver is None:
driver = self._webdriver_factory.create_webdriver

# Register webdriver so it can be retrieved by the manager and
# cleaned up after exit.
self.__register_driver(channel, driver)
else:
try:
driver.quit()
except:
pass
# Register webdriver so it can be retrieved by the manager and
# cleaned up after exit.
self.__register_driver(channel, driver)

driver = self._webdriver_factory.create_webdriver(# global browser name)
self.__register_driver(channel, driver)
driver = self._webdriver_factory.create_webdriver
self.__register_driver(channel, driver)

else:
# Attempt to tear down any existing webdriver.
Expand All @@ -177,7 +161,7 @@ def new_driver(self):
except:
pass
self.__unregister_driver(channel)
driver = self._webdriver_factory.create_webdriver(# global browser)
driver = self._webdriver_factory.create_webdriver
self.__register_driver(channel, driver)

return driver
Expand Down Expand Up @@ -235,7 +219,8 @@ class WebDriverAPI(object):
Provides a necessary higher-abstraction wrapper around selenium WebDriver
"""

def __init__(self, browser):
def __init__(self, Config, browser):
self.CONFIG = Config
self.browser = WebDriverManager.get_driver()

@staticmethod
Expand Down Expand Up @@ -296,10 +281,9 @@ def goToURL(self, url):
#navigate().to() and get() are synonyms :)
self.browser.get(url)
handlePopUps()
except WebDriverException, e:
except WebDriverException:
pass
except InterruptedException, e:
print "goToUrl got interrupted while waiting for the page to be loaded ", e
except InterruptedException:
pass

def handlePopUps(self):
Expand All @@ -309,8 +293,7 @@ def handlePopUps(self):
+ "window.confirm = function(msg){return true;};" \
+ "window.prompt = function(msg){return true;};" \
)
except UnexpectedAlertPresentException, e:
print "Unexpected Alert element: ", e
except UnexpectedAlertPresentException:
pass

def goback(self):
Expand Down Expand Up @@ -347,7 +330,7 @@ def screenshot(self, filename):
except Exception, e:
print "Error: ", e

def dom(self):
def getDOM(self):
return self.browser.page_source

# Later define it in the user profiles, or take from owtf general.cfg
Expand Down
127 changes: 0 additions & 127 deletions graph.py

This file was deleted.

1 change: 1 addition & 0 deletions lib/readability/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .readability import Document
31 changes: 31 additions & 0 deletions lib/readability/cleaners.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# strip out a set of nuisance html attributes
from lxml.html.clean import Cleaner

bad_attrs = ['width', 'height', 'style', '[-a-z]*color', 'background[-a-z]*', 'on*']
single_quoted = "'[^']+'"
double_quoted = '"[^"]+"'
non_space = '[^ "\'>]+'
htmlstrip = re.compile("<" # open
"([^>]+) " # prefix
"(?:%s) *" % ('|'.join(bad_attrs),) + # undesirable attributes
'= *(?:%s|%s|%s)' % (non_space, single_quoted, double_quoted) + # value
"([^>]*)" # postfix
">" # end
, re.I)

def clean_attributes(html):
while htmlstrip.search(html):
html = htmlstrip.sub('<\\1\\2>', html)
return html

def normalize_spaces(s):
if not s: return ''
"""replace any sequence of whitespace
characters with a single space"""
return ' '.join(s.split())

html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True, embedded=False,
frames=False, forms=False, annoying_tags=False, remove_tags=None,
remove_unknown_tags=False, safe_attrs_only=False)
Loading

0 comments on commit 3d70966

Please sign in to comment.