Skip to content
This repository has been archived by the owner on Mar 27, 2018. It is now read-only.

Commit

Permalink
Merge pull request #1 from owtf/0.11-Sypderjax
Browse files Browse the repository at this point in the history
Merge changes
  • Loading branch information
viyatb committed Jul 27, 2014
2 parents 3d70966 + a972b13 commit 44e3dc6
Show file tree
Hide file tree
Showing 18 changed files with 446 additions and 1,941 deletions.
25 changes: 20 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,11 @@ It also performs clicking, firing/triggering of events on certain candidate elem

This uses the HTML source to perform parsing.

* Compares DOM states before and after an event is triggered by the bot.
* Compares DOM trees before and after an event is triggered by the bot.

* Calculates the difference between two states
* Calculates the difference between two states (basically the **edit distance**)

* Parses the new DOM state for new links, changes

* (To be added later)
* Parses the new DOM tree for state changes


3. Controller
Expand All @@ -51,6 +49,8 @@ This manages the ***clickbot*** and the ***state-flow graph engine***.

It initializes, pauses, and stops the bot. This also creates the ***state flow graph*** based on **DOM state** changes (from the *DOM analyzer*).

This also controls the browser pool (multiple instances of the browser)


4. State-flow graph
---
Expand All @@ -74,6 +74,21 @@ Milestone

- [x] Minimal, functional webUI

Roadmap
---

* Beyond GSoC (0.11)

- [ ] Multiple instances of the browser, and managing the browser pool

- [ ] Multiprocessing/threading* based on future considerations

- [ ] Standalone package

- [ ] REsT API

- [ ] Fully functional web interface


Contribute
---
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions configs/config.json → config/config.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
"phantomjs_path": "/usr/local/bin/phantomjs",
"chromedriver_path": "/home/viyat/workspace/spyderjax/lib/chromedriver",
"timeout": {
"brief": 5,
"short": 10
"w": 5,
"visit_condition_timeout": 10
}
}
13 changes: 6 additions & 7 deletions controller.py → config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,12 @@
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* This module defines a controller which manages the start, pause and stop
process of the robot.
'''

import utils
import threading
import logging
def get_config():
""" loads the primary config into memory; rightnow the filepath is hardcoded. """

with open('config.json') as data:
config = json.loads(data)

from main import Config
return config
Empty file added core/__init__.py
Empty file.
105 changes: 105 additions & 0 deletions core/controller.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
'''
owtf is an OWASP+PTES-focused try to unite great tools and facilitate pen testing
Copyright (c) 2011, Abraham Aranguren <[email protected]> Twitter: @7a_ http://7-a.org
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright owner nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
* This module defines a controller which manages the start, pause and stop
process of the robot.
'''

import simplejson as json
import subprocess
from glob import glob
from Queue import Queue
import threading

from main import Core
from embedded_browser import Browser


threads = [] # contains handles for each thread
q = Queue() # The main queue.
output = Queue() # The output queue - prevents output overlap


class OutThread(threading.Thread): # Worker thread that takes care of output
def __init__(self, Core, queue, logfile):
threading.Thread.__init__(self)
self.queue = queue
self.logfile = os.path.join(self.core.RootDir, 'output/output.log')
self.core = Core

def run(self):
while True:
writelog(self.queue.get(), self.logfile)
self.queue.task_done()


class Control(object):
""" Mainly manages the browser instances."""
def __init__(self, Core, desired_capabilities=None):
self.core = Core
self.pool = {}

def get_all(self):
return self.pool


def run():
"""Make test run in mutiple browsers
"""

class SubTest(unittest.TestCase):
def __init__(self, driver=None):
self.driver = driver
self.driver.implicitly_wait(30)

def wrapper(*args, **kwargs):
threads = []
queue = multiprocessing.Queue(len(args[0].drivers._desired_capabilities) + 1)
i = 0

if not hasattr(args[0].drivers, "_drivers"):
for c in args[0].drivers._desired_capabilities:
kwargs = {'desired_capabilities': c}

if args[0].drivers._command_executor != None:
kwargs['command_executor'] = args[0].drivers._command_executor

driver = webdriver.Remote(**kwargs)
args[0].drivers.register(driver)

for d in args[0].drivers._drivers:
t = multiprocessing.Process(target=thread_func, args=(test, d))
t.start()
threads += [t]

for t in threads:
t.join()


return wrapper
118 changes: 118 additions & 0 deletions core/embedded_browser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/python2
# -*- coding: utf-8 -*-
'''
owtf is an OWASP+PTES-focused try to unite great tools and facilitate pen testing
Copyright (c) 2011, Abraham Aranguren <[email protected]> Twitter: @7a_ http://7-a.org
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the copyright owner nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# This is a part of Google Summer of Code 2014 project, OWASP OWTF
'''
import sys
import time
import re
import simplejson as json
import copy
import functools
import mimetypes
from lxml import html

from main import Core

import splinter

from selenium.webdriver.support import wait
from utils.webdriver_patches import patch_webdriver
from utils.splinter_patches import patch_webdriverelement


class Browser(object):
"""Emulate splinter's Browser."""

def __init__(self, Core, *args, **kwargs):
#self.driver = create()
self.core = Core

def create(self):
# handle like a switch case
if self.core.Config["driver"] == "firefox":
profile = FirefoxProfile()
profile.set_preference("network.proxy.type", 1)
profile.set_preference("network.proxy.http", "127.0.0.1")
profile.set_preference("network.proxy.http_port", "8008")
profile.set_preference("network.proxy.no_proxies_on", "")
profile.set_preference('webdriver_enable_native_events', True)
profile.update_preferences()
browser = splinter.Browser('firefox', firefox_profile=profile)

return browser

elif self.core.Config["driver"] == "chrome":
options = ChromeOptions()
options.add_arguments("--proxy-server=http://127.0.0.1:8008/")
browser = splinter.Browser('chrome',
executable_path=self.core.Config["chromedriver_path"],
options
)

return browser

elif self.core.Config["driver"] == "phantomjs":
service_args = (
'--proxy=127.0.0.1:8008',
'--proxy-type=http',
'--ignore-ssl-errors=true'
)

browser = splinter.Browser('phantomjs',
self.core.Config["phantomjs_path"],
service_args=service_args
)

return browser

def wait_for_condition(self, condition=None, timeout=None, poll_frequency=0.5, ignored_exceptions=None):
"""Wait for given javascript condition."""
condition = functools.partial(condition or self.visit_condition, self)

timeout = timeout or self.visit_condition_timeout

return wait.WebDriverWait(self.driver,
timeout,
poll_frequency=poll_frequency,
ignored_exceptions=ignored_exceptions
).until(lambda browser: condition())

# Later define it in the user profiles, or take from owtf general.cfg
DEFAULT_ELEMENTS = ["a", "button", "li", "nav", "ol", "span", "ul", "header", "footer", "section"]

def get_path(self):
"""
Method to get clickable elements from browser DOM (using XPath)
List of eligible elements will come from config file
"""
clicable_element_types = tuple('%s[not(contains(@class, "selenium_donotclick"))]' % i for i in (
'a', 'submit', 'input[@type="submit"]',
))
xpath = '|'.join('//%s' % item for item in clicable_element_types)
22 changes: 22 additions & 0 deletions core/spider.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
#!/usr/bin/env python2
# -*- coding: utf-8 -*-

from lxml import html

from utils import dom_utils
from main import Core


class Spider(object):
"""
This is the main crawling engine.
- It will use the robot (browser) module to do the crawling
and will pass on the DOM tree for analysis.
- The state module will provide the necessary functions for
creating state-flow graph.
"""

def __init__(self, Core, depth, base_url):
self.core = Core
self.base = Core.Config["target"]
self.depth = Core.Config["crawl_depth"]
36 changes: 5 additions & 31 deletions state_machine.py → core/state_machine.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@
from lxml import html

import main
from embedded_browser import
import controller
from stategraph import StateFlowGraph
from utils import dom_utils
Expand All @@ -41,36 +40,11 @@ class StateMachine(object):
""" The state machine class. """

def __init__(self, Core, embedded_browser):
self.Core = Core
self.browser = embedded_browser
self.core = Core
self.browserpool = {}

def initialState(self):
def initial_state(self):
pass

def currentState(self):
return get_state_by_id(index[0])

def newState(self):
dom = self.browser.getDom()

return stateFlowGraph.new_state(self.browser.get_base_url(),
dom,
dom_utils.normalize(dom)
)

# ChangeS the currentState to the nextState if possible. The next state should already be
# present in the graph.
def changeState(self):
if not nextState:
return False

if StateFlowGraph.can_goto(currentState, nextState):
# next state becomes the current state
currentState() = nextState();
return True

else:
return False

# Adds the newState and the edge between the currentState and the newState on the SFG.
# SFG = stateFlowGraph
def current_state(self):
return StateFlowGraph.get_state_by_id(index[0])
File renamed without changes.
Loading

0 comments on commit 44e3dc6

Please sign in to comment.