pystemon.yaml

archive:
  save: yes             # Keep 
  save-all: yes          # Keep a copy of all pasties
  dir: "alerts"         # Directory where matching pasties should be kept
  dir-all: "archive"    # Directory where all pasties should be kept (if save-all is set to yes)
  compress: yes         # Store the pasties compressed

db:
  sqlite3:              # Store information about the pastie in a database
    enable: no          # Activate this DB engine   # NOT FULLY IMPLEMENTED
    file: 'db.sqlite3'  # The filename of the database

email:
  alert: no             # Enable/disable email alerts
  from: alert@example.com
  to: alert@example.com
  server: 127.0.0.1     # Address of the server (hostname or IP)
  port: 25              # Outgoing SMTP port: 25, 587, ...
  username: ''          # (optional) Username for authentication. Leave blank for no authentication.
  password: ''          # (optional) Password for authentication. Leave blank for no authentication.
  subject: '[pystemon] - {subject}'

#####
# Definition of regular expressions to search for in the pasties
# 
search:
#  - description: ''    # (optional) A human readable description. (used in alerting)
#    search: ''         # The regular expression to search for
#    count: ''          # (optional) How many hits should it have to be interesting?
#    exclude: ''        # (optional) Do not alert if this regular expression matches
#    regex-flags: ''    # (optional) Regular expression flags to give to the find function.
#                       #            Default = re.IGNORECASE
#                       #            Set to 0 to have no flags set
#                       #            See http://docs.python.org/2/library/re.html#re.DEBUG for more info.
#                       #            Warning: when setting this the default is overridden
#                       #  example: 're.MULTILINE + re.DOTALL + re.IGNORECASE'
  - search: '[^a-zA-Z0-9]example\.com'
  - description: 'Download (non-porn)'
    search: 'download'
    exclude: 'porn|sex|teen'
    count: 4

#####
# Configuration section for the paste sites
#
threads: 1              # number of download threads per site
site:
#  example.com:
#    archive-url:       # the url where the list of last pasties is present 
#                       # example: 'http://pastebin.com/archive'
#    archive-regex:     # a regular expression to extract the pastie-id from the page.
#                       # do not forget the () to extract the pastie-id
#                       # example: '<a href="/(\w{8})">.+</a></td>'
#    download-url:      # url for the raw pastie.    
#                       # Should contain {id} on the place where the ID of the pastie needs to be placed
#                       # example: 'http://pastebin.com/raw.php?i={id}'
#    update-max: 40     # every X seconds check for new updates to see if new pasties are available
#    update-min: 30     # a random number will be chosen between these two numbers
#    pastie-classname:  # OPTIONAL: The name of a custom Class that inherits from Pastie
#                       # This is practical for sites that require custom fetchPastie() functions

  pastebin.com:
    archive-url: 'http://pastebin.com/archive'
    archive-regex: '<a href="/(\w{8})">.+</a></td>'
    download-url: 'http://pastebin.com/raw.php?i={id}'
    update-max: 40
    update-min: 30

  pastie.org:
    archive-url: 'http://pastie.org/pastes'
    archive-regex: '<a href="http://pastie.org/pastes/(\d{7})">'
    download-url: 'http://pastie.org/pastes/{id}/text'

  nopaste.me:
    archive-url: 'http://nopaste.me/recent' 
    archive-regex: '<a href="http://nopaste.me/paste/([a-zA-Z0-9]+)">'
    download-url: 'http://nopaste.me/download/{id}.txt'

  slexy.org:
    archive-url: 'http://slexy.org/recent' 
    archive-regex: '<a href="/view/([a-zA-Z0-9]+)">View paste</a>'
    download-url: 'http://slexy.org/raw/{id}'

  pastesite.com:
    pastie-classname: PastiePasteSiteCom
    archive-url: 'http://pastesite.com/recent' 
    archive-regex: '<a href="(\d+)" title="View this Paste'
    download-url: 'http://pastesite.com/plain/{id}.txt'

#  gist.github.com:
#    archive-url: 'https://gist.github.com/gists'
#    archive-regex: '<span><a href="/([0-9]+)">gist'
#    download-url: 'https://raw.github.com/gist/{id}'
    
  codepad.org:
    archive-url: 'http://codepad.org/recent'
    archive-regex: '<a href="http://codepad.org/([a-zA-Z0-9]+)">view'
    download-url: 'http://codepad.org/{id}/raw.txt'
    
  cdv.lt:
    pastie-classname: PastieCdvLt
    archive-url: 'http://cdv.lt/snippets'
    archive-regex: '<a href="/([a-zA-Z0-9]+)">[0-9]'
    download-url: 'http://cdv.lt/api/snippet/{id}'

  snipt.net:
    pastie-classname: PastieSniptNet
    #archive-url: 'https://snipt.net/'
    #archive-regex: '<h1><a href="/([a-zA-Z0-9-_/]+)/">'
    archive-url: 'https://snipt.net/?rss'
    archive-regex: '<link>https://snipt.net/(.+)/</link>'
    download-url: 'https://snipt.net/{id}/'

#  safebin.net:  # FIXME not finished
#    archive-url: 'http://safebin.net/?archive'
#    archive-regex: '<a title="[a-zA-Z0-9 :,]+" href="/([0-9]+)">'
#    download-url: 'http://safebin.net/{id}'
#    update-max: 60
#    update-min: 50


# TODO
# http://www.safebin.net/       # more complex site
# http://www.heypasteit.com/    # http://www.heypasteit.com/clip/0IZA => incremental 

# http://hastebin.com/          # no list of last pastes
# http://sebsauvage.net/paste/  # no list of last pastes
# http://tny.cz/                # no list of last pastes
# https://pastee.org/           # no list of last pastes
# http://paste2.org/            # no list of last pastes
# http://0bin.net/              # no list of last pastes
# http://markable.in/           # no list of last pastes


#####
# Configuration section to configure proxies
# Currently only HTTP proxies are permitted
#
proxy:
  random: no
  file: 'proxies.txt'

#####
# Configuration section for User-Agents
#
user-agent:
  random: no
  file: 'user-agents.txt'