diff --git a/Gemfile b/Gemfile index 8221dfa..216d157 100644 --- a/Gemfile +++ b/Gemfile @@ -4,6 +4,7 @@ gem 'feed-normalizer' gem 'twitter', '~> 5.3.0' gem 'twitter_oauth' gem 'json' +gem 'levenshtein' gem 'nokogiri' diff --git a/Gemfile.lock b/Gemfile.lock index cd018fc..f96ae9b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,6 +19,7 @@ GEM http_parser.rb http_parser.rb (0.6.0) json (1.8.1) + levenshtein (0.2.2) memoizable (0.4.0) thread_safe (~> 0.1.3) mime-types (2.1) @@ -64,6 +65,7 @@ PLATFORMS DEPENDENCIES feed-normalizer json + levenshtein rake rspec twitter (~> 5.3.0) diff --git a/lib/dayone.rb b/lib/dayone.rb index 3a2f8c3..0b25b05 100644 --- a/lib/dayone.rb +++ b/lib/dayone.rb @@ -1,4 +1,7 @@ require 'fileutils' +require 'digest/md5' +require 'levenshtein' +require 'pp' class DayOne < Slogger def to_dayone(options = {}) @@ -82,7 +85,6 @@ def process_image(image) unless ext =~ /\.jpg$/ case ext when '.jpeg' - @log.info("81") target = orig.gsub(/\.jpeg$/,'.jpg') FileUtils.mv(orig,target) return target @@ -126,4 +128,185 @@ def store_single_photo(file, options = {}, copy = false) return self.to_dayone(options) end end + + def levenshtein_distance(s, t) + m = s.size + n = t.size + d = Array.new(m+1) { Array.new(n+1) } + for i in 0..m + d[i][0] = i + end + for j in 0..n + d[0][j] = j + end + for j in 0...n + for i in 0...m + if s[i,1] == t[j,1] + d[i+1][j+1] = d[i][j] + else + d[i+1][j+1] = [d[i ][j+1] + 1, # deletion + d[i+1][j ] + 1, # insertion + d[i ][j ] + 1 # substitution + ].min + end + end + end + d[m][n] + end + + def dedup(similar=false) + files = Dir.glob(File.join(storage_path, 'entries', '*.doentry')) + to_keep = [] + to_delete = [] + similar_threshold = 30 + + if (similar) + dot_counter = 0 + files.each {|file| + next if to_keep.include?(file) || to_delete.include?(file) + photo_path = File.join(storage_path, 'photos') + photo = File.join(photo_path, File.basename(file,'.doentry')+'.jpg') + if File.exists?(photo) + to_keep.push(file) + next + end + + to_keep.push(file) + + data = Plist::parse_xml(file) + date = data['Creation Date'].strftime('%Y%m%d') + lines = data['Entry Text'].split("\n") + lines.delete_if {|line| line =~ /^\s*$/ } + text1 = lines.join('')[0..30] + + files.each {|file2| + next if to_keep.include?(file2) || to_delete.include?(file2) + photo = File.join(photo_path, File.basename(file,'.doentry')+'.jpg') + if File.exists?(photo) + to_keep.push(file) + next + end + + data2 = Plist::parse_xml(file2) + + if data2['Creation Date'].strftime('%Y%m%d') == date + lines2 = data2['Entry Text'].split("\n") + lines2.delete_if {|line| line =~ /^\s*$/ } + text2 = lines2.join('')[0..30] + + distance = Levenshtein.normalized_distance(text1, text2, threshold=nil) * 100 + if distance < similar_threshold + distance2 = Levenshtein.normalized_distance(lines.join('')[0..500], lines2.join('')[0..500]) + if distance2 > similar_threshold + printf "\r%02.4f: %s => %s\n" % [distance, File.basename(file), File.basename(file2)] + dot_counter = 0 + if lines2.join("\n").length > lines.join("\n").length + to_delete.push(file) + to_keep.delete(file) + else + to_delete.push(file2) + to_keep.delete(file2) + end + end + else + print "." + dot_counter += 1 + if dot_counter == 91 + print "\r" + dot_counter = 0 + end + to_keep.push(file2) + end + # if distance < similar_threshold + # puts "#{distance}: #{File.basename(file)} => #{File.basename(file2)}" + # if lines2.join("\n").length > lines.join("\n").length + # to_delete.push(file) + # to_keep.delete(file) + # else + # to_delete.push(file2) + # to_keep.delete(file2) + # end + end + } + } + exit + else + hashes = [] + files.each {|file| + data = Plist::parse_xml(file) + tags = data['Tags'].nil? ? '' : data['Tags'].join('') + hashes.push({ 'filename' => file, 'date' => data['Creation Date'], 'hash' => Digest::MD5.hexdigest(data['Entry Text']+tags+data['Starred'].to_s) }) + } + + hashes.sort_by!{|entry| entry['date']} + + existing = [] + to_delete = [] + hashes.each {|entry| + if existing.include?(entry['hash']) + to_delete.push(entry['filename']) + else + existing.push(entry['hash']) + end + } + to_delete.uniq! + end + + images = Dir.glob(File.join(storage_path, 'photos', '*.jpg')) + image_hashes = [] + + images_to_delete = [] + images.each {|image| + image_hashes.push({ 'filename' => image, 'hash' => Digest::MD5.file(image), 'date' => File.stat(image).ctime }) + } + + image_hashes.sort_by!{|image| image['date']} + + images_existing = [] + images_to_delete = [] + image_hashes.each {|image| + if images_existing.include?(image['hash']) + images_to_delete.push(image['filename']) + else + images_existing.push(image['hash']) + end + } + + # puts "Ready to move #{to_delete.length} files to the Trash?" + trash = File.expand_path('~/Desktop/DayOneDuplicates') + + FileUtils.mkdir_p(File.join(trash,"photos")) unless File.directory?(File.join(trash,"photos")) + FileUtils.mkdir_p(File.join(trash,"entries")) unless File.directory?(File.join(trash,"entries")) + + photo_path = File.join(storage_path, 'photos') + + to_delete.each {|file| + + photo = File.join(photo_path, File.basename(file,'.doentry')+'.jpg') + if File.exists?(photo) + images_to_delete.delete(photo) + FileUtils.mv(photo,File.join(trash,'photos')) + end + + FileUtils.mv(file,File.join(trash,'entries')) + } + + entry_path = File.join(storage_path, 'entries') + images_deleted = 0 + + images_to_delete.each {|file| + + entry = File.join(entry_path, File.basename(file,'.jpg')+'.doentry') + next if File.exists?(entry) + + if File.exists?(file) + FileUtils.mv(file,File.join(trash,"photos")) + images_deleted += 1 + end + } + + @log.info("Moved #{to_delete.length} entries/photos to #{trash}.") + @log.info("Found and moved #{images_deleted} images without entries.") + # %x{open -a Finder #{trash}} + end end diff --git a/lib/plist.rb b/lib/plist.rb new file mode 100644 index 0000000..7b9b705 --- /dev/null +++ b/lib/plist.rb @@ -0,0 +1,448 @@ +# = plist +# +# Copyright 2006-2010 Ben Bleything and Patrick May +# Distributed under the MIT License +# + +module Plist ; end + +# === Create a plist +# You can dump an object to a plist in one of two ways: +# +# * Plist::Emit.dump(obj) +# * obj.to_plist +# * This requires that you mixin the Plist::Emit module, which is already done for +Array+ and +Hash+. +# +# The following Ruby classes are converted into native plist types: +# Array, Bignum, Date, DateTime, Fixnum, Float, Hash, Integer, String, Symbol, Time, true, false +# * +Array+ and +Hash+ are both recursive; their elements will be converted into plist nodes inside the and containers (respectively). +# * +IO+ (and its descendants) and +StringIO+ objects are read from and their contents placed in a element. +# * User classes may implement +to_plist_node+ to dictate how they should be serialized; otherwise the object will be passed to Marshal.dump and the result placed in a element. +# +# For detailed usage instructions, refer to USAGE[link:files/docs/USAGE.html] and the methods documented below. +module Plist::Emit + # Helper method for injecting into classes. Calls Plist::Emit.dump with +self+. + def to_plist(envelope = true) + return Plist::Emit.dump(self, envelope) + end + + # Helper method for injecting into classes. Calls Plist::Emit.save_plist with +self+. + def save_plist(filename) + Plist::Emit.save_plist(self, filename) + end + + # The following Ruby classes are converted into native plist types: + # Array, Bignum, Date, DateTime, Fixnum, Float, Hash, Integer, String, Symbol, Time + # + # Write us (via RubyForge) if you think another class can be coerced safely into one of the expected plist classes. + # + # +IO+ and +StringIO+ objects are encoded and placed in elements; other objects are Marshal.dump'ed unless they implement +to_plist_node+. + # + # The +envelope+ parameters dictates whether or not the resultant plist fragment is wrapped in the normal XML/plist header and footer. Set it to false if you only want the fragment. + def self.dump(obj, envelope = true) + output = plist_node(obj) + + output = wrap(output) if envelope + + return output + end + + # Writes the serialized object's plist to the specified filename. + def self.save_plist(obj, filename) + File.open(filename, 'wb') do |f| + f.write(obj.to_plist) + end + end + + private + def self.plist_node(element) + output = '' + + if element.respond_to? :to_plist_node + output << element.to_plist_node + else + case element + when Array + if element.empty? + output << "\n" + else + output << tag('array') { + element.collect {|e| plist_node(e)} + } + end + when Hash + if element.empty? + output << "\n" + else + inner_tags = [] + + element.keys.sort.each do |k| + v = element[k] + inner_tags << tag('key', CGI::escapeHTML(k.to_s)) + inner_tags << plist_node(v) + end + + output << tag('dict') { + inner_tags + } + end + when true, false + output << "<#{element}/>\n" + when Time + output << tag('date', element.utc.strftime('%Y-%m-%dT%H:%M:%SZ')) + when Date # also catches DateTime + output << tag('date', element.strftime('%Y-%m-%dT%H:%M:%SZ')) + when String, Symbol, Fixnum, Bignum, Integer, Float + output << tag(element_type(element), CGI::escapeHTML(element.to_s)) + when IO, StringIO + element.rewind + contents = element.read + # note that apple plists are wrapped at a different length then + # what ruby's base64 wraps by default. + # I used #encode64 instead of #b64encode (which allows a length arg) + # because b64encode is b0rked and ignores the length arg. + data = "\n" + Base64::encode64(contents).gsub(/\s+/, '').scan(/.{1,68}/o) { data << $& << "\n" } + output << tag('data', data) + else + output << comment( 'The element below contains a Ruby object which has been serialized with Marshal.dump.' ) + data = "\n" + Base64::encode64(Marshal.dump(element)).gsub(/\s+/, '').scan(/.{1,68}/o) { data << $& << "\n" } + output << tag('data', data ) + end + end + + return output + end + + def self.comment(content) + return "\n" + end + + def self.tag(type, contents = '', &block) + out = nil + + if block_given? + out = IndentedString.new + out << "<#{type}>" + out.raise_indent + + out << block.call + + out.lower_indent + out << "" + else + out = "<#{type}>#{contents.to_s}\n" + end + + return out.to_s + end + + def self.wrap(contents) + output = '' + + output << '' + "\n" + output << '' + "\n" + output << '' + "\n" + + output << contents + + output << '' + "\n" + + return output + end + + def self.element_type(item) + case item + when String, Symbol + 'string' + + when Fixnum, Bignum, Integer + 'integer' + + when Float + 'real' + + else + raise "Don't know about this data type... something must be wrong!" + end + end + private + class IndentedString #:nodoc: + attr_accessor :indent_string + + def initialize(str = "\t") + @indent_string = str + @contents = '' + @indent_level = 0 + end + + def to_s + return @contents + end + + def raise_indent + @indent_level += 1 + end + + def lower_indent + @indent_level -= 1 if @indent_level > 0 + end + + def <<(val) + if val.is_a? Array + val.each do |f| + self << f + end + else + # if it's already indented, don't bother indenting further + unless val =~ /\A#{@indent_string}/ + indent = @indent_string * @indent_level + + @contents << val.gsub(/^/, indent) + else + @contents << val + end + + # it already has a newline, don't add another + @contents << "\n" unless val =~ /\n$/ + end + end + end +end + +# we need to add this so sorting hash keys works properly +class Symbol #:nodoc: + def <=> (other) + self.to_s <=> other.to_s + end +end + +class Array #:nodoc: + include Plist::Emit +end + +class Hash #:nodoc: + include Plist::Emit +end + +# === Load a plist file +# This is the main point of the library: +# +# r = Plist::parse_xml( filename_or_xml ) +module Plist +# Note that I don't use these two elements much: +# +# + Date elements are returned as DateTime objects. +# + Data elements are implemented as Tempfiles +# +# Plist::parse_xml will blow up if it encounters a data element. +# If you encounter such an error, or if you have a Date element which +# can't be parsed into a Time object, please send your plist file to +# plist@hexane.org so that I can implement the proper support. + def Plist::parse_xml( filename_or_xml ) + listener = Listener.new + #parser = REXML::Parsers::StreamParser.new(File.new(filename), listener) + parser = StreamParser.new(filename_or_xml, listener) + parser.parse + listener.result + end + + class Listener + #include REXML::StreamListener + + attr_accessor :result, :open + + def initialize + @result = nil + @open = Array.new + end + + + def tag_start(name, attributes) + @open.push PTag::mappings[name].new + end + + def text( contents ) + @open.last.text = contents if @open.last + end + + def tag_end(name) + last = @open.pop + if @open.empty? + @result = last.to_ruby + else + @open.last.children.push last + end + end + end + + class StreamParser + def initialize( plist_data_or_file, listener ) + if plist_data_or_file.respond_to? :read + @xml = plist_data_or_file.read + elsif File.exists? plist_data_or_file + @xml = File.read( plist_data_or_file ) + else + @xml = plist_data_or_file + end + + @listener = listener + end + + TEXT = /([^<]+)/ + XMLDECL_PATTERN = /<\?xml\s+(.*?)\?>*/um + DOCTYPE_PATTERN = /\s*)/um + COMMENT_START = /\A/um + + + def parse + plist_tags = PTag::mappings.keys.join('|') + start_tag = /<(#{plist_tags})([^>]*)>/i + end_tag = /<\/(#{plist_tags})[^>]*>/i + + require 'strscan' + + @scanner = StringScanner.new( @xml ) + until @scanner.eos? + if @scanner.scan(COMMENT_START) + @scanner.scan(COMMENT_END) + elsif @scanner.scan(XMLDECL_PATTERN) + elsif @scanner.scan(DOCTYPE_PATTERN) + elsif @scanner.scan(start_tag) + @listener.tag_start(@scanner[1], nil) + if (@scanner[2] =~ /\/$/) + @listener.tag_end(@scanner[1]) + end + elsif @scanner.scan(TEXT) + @listener.text(@scanner[1]) + elsif @scanner.scan(end_tag) + @listener.tag_end(@scanner[1]) + else + raise "Unimplemented element" + end + end + end + end + + class PTag + @@mappings = { } + def PTag::mappings + @@mappings + end + + def PTag::inherited( sub_class ) + key = sub_class.to_s.downcase + key.gsub!(/^plist::/, '' ) + key.gsub!(/^p/, '') unless key == "plist" + + @@mappings[key] = sub_class + end + + attr_accessor :text, :children + def initialize + @children = Array.new + end + + def to_ruby + raise "Unimplemented: " + self.class.to_s + "#to_ruby on #{self.inspect}" + end + end + + class PList < PTag + def to_ruby + children.first.to_ruby if children.first + end + end + + class PDict < PTag + def to_ruby + dict = Hash.new + key = nil + + children.each do |c| + if key.nil? + key = c.to_ruby + else + dict[key] = c.to_ruby + key = nil + end + end + + dict + end + end + + class PKey < PTag + def to_ruby + CGI::unescapeHTML(text || '') + end + end + + class PString < PTag + def to_ruby + CGI::unescapeHTML(text || '') + end + end + + class PArray < PTag + def to_ruby + children.collect do |c| + c.to_ruby + end + end + end + + class PInteger < PTag + def to_ruby + text.to_i + end + end + + class PTrue < PTag + def to_ruby + true + end + end + + class PFalse < PTag + def to_ruby + false + end + end + + class PReal < PTag + def to_ruby + text.to_f + end + end + + require 'date' + class PDate < PTag + def to_ruby + DateTime.parse(text) + end + end + + require 'base64' + class PData < PTag + def to_ruby + data = Base64.decode64(text.gsub(/\s+/, '')) + + begin + return Marshal.load(data) + rescue Exception => e + io = StringIO.new + io.write data + io.rewind + return io + end + end + end +end + + +module Plist + VERSION = '3.1.0' +end diff --git a/plugins/BlogLogger.rb b/plugins/BlogLogger.rb index 34fae40..6d07498 100644 --- a/plugins/BlogLogger.rb +++ b/plugins/BlogLogger.rb @@ -53,7 +53,7 @@ def do_log retries = 0 success = false until success - if parse_feed(rss_feed) + if parse_feed(rss_feed, retries) success = true else break if $options[:max_retries] == retries @@ -69,7 +69,7 @@ def do_log end end - def parse_feed(rss_feed) + def parse_feed(rss_feed, retries) markdownify = @blogconfig['markdownify_posts'] unless (markdownify.is_a? TrueClass or markdownify.is_a? FalseClass) markdownify = true @@ -79,18 +79,23 @@ def parse_feed(rss_feed) starred = true end tags = @blogconfig['blog_tags'] || '' - tags = "\n\n#{tags}\n" unless tags == '' + tags = "\n\n(#{tags})\n" unless tags == '' today = @timespan begin rss_content = "" open(rss_feed) do |f| - rss_content = f.read + begin + rss_content = f.read + rescue Exception => e + $stderr.puts "Reading content for #{item.title}" + $stderr.puts e + end end rss = RSS::Parser.parse(rss_content, false) - if @blogconfig['get_most_popular'] + if @blogconfig['get_most_popular'] && retries == 0 @log.info("Checking for most tweeted posts on #{rss.title.content}") posts = [] rss.items.each { |item| @@ -129,8 +134,8 @@ def parse_feed(rss_feed) sl.to_dayone(options) end end - rss.items.each { |item| + rss.items.each { |item| begin if item.class == RSS::Atom::Feed::Entry item_date = Time.parse(item.updated.to_s) + Time.now.gmt_offset @@ -150,7 +155,8 @@ def parse_feed(rss_feed) content = item.summary.content if content.nil? @log.error("No content field recognized in #{rss_feed}") if content.nil? rescue Exception => e - p e + $stderr.puts "Reading content for #{item.title}" + $stderr.puts e return false end else @@ -158,28 +164,43 @@ def parse_feed(rss_feed) @log.error("No content field recognized in #{rss_feed}") if content.nil? end - imageurl = false - image_match = content.match(/src="(https?:.*?\.(jpg|png|jpeg))(\?.*?)?"/i) rescue nil - imageurl = image_match[1] unless image_match.nil? + # if RUBY_VERSION.to_f > 1.9 + # content = content.force_encoding('utf-8') + # end + + begin + imageurl = false + image_match = content.match(/src="(https?:.*?\.(jpg|png|jpeg))(\?.*?)?"/i) rescue nil + imageurl = image_match[1] unless image_match.nil? + - # can't find a way to truncate partial html without nokogiri or other gems... - # content = content.truncate_html(10) unless @blogconfig['full_posts'] - content.gsub!(/(?:)+/,"\nhttp://vimeo.com/\\1\n\n") - content.gsub!(//,"\nhttp://www.youtube.com/watch?v=\\1\n\n") + # can't find a way to truncate partial html without nokogiri or other gems... + # content = content.truncate_html(10) unless @blogconfig['full_posts'] + content.gsub!(/(?:)+/,"\nhttp://vimeo.com/\\1\n\n") + content.gsub!(//,"\nhttp://www.youtube.com/watch?v=\\1\n\n") - content = content.markdownify if markdownify rescue content + content = content.markdownify if markdownify rescue content - # handle " _place_holder;" thing - content.gsub!(/ _place_holder;/," ") + # handle " _place_holder;" thing + content.gsub!(/ _place_holder;/," ") + rescue Exception => e + $stderr.puts "Gathering images for #{item.title}" + $stderr.puts e + end options = {} - if item.class == RSS::Atom::Feed::Entry - title = item.title.content.gsub(/\n+/,' ') - link = item.link.href - else - title = item.title.gsub(/\n+/,' ') - link = item.link + begin + if item.class == RSS::Atom::Feed::Entry + title = item.title.content.gsub(/\n+/,' ') + link = item.link.href + else + title = item.title.gsub(/\n+/,' ') + link = item.link + end + rescue Exception => e + $stderr.puts "Reading title for #{item.title}" + $stderr.puts e end options['content'] = "## [#{title.strip}](#{link.strip})\n\n#{content.strip}#{tags}" @@ -203,7 +224,8 @@ def parse_feed(rss_feed) end } rescue Exception => e - p e + $stderr.puts "Reading posts for #{rss_feed}" + $stderr.puts e return false end return true diff --git a/plugins/appnetlogger.rb b/plugins/appnetlogger.rb index e81fda6..4ec59b9 100644 --- a/plugins/appnetlogger.rb +++ b/plugins/appnetlogger.rb @@ -46,7 +46,7 @@ def do_log sl = DayOne.new config['appnet_tags'] ||= '' - tags = "\n\n#{config['appnet_tags']}\n" unless config['appnet_tags'] == '' + tags = "\n\n(#{config['appnet_tags']})\n" unless config['appnet_tags'] == '' today = @timespan.to_i @log.info("Getting App.net posts for #{config['appnet_usernames'].length} feeds") @@ -58,7 +58,7 @@ def do_log config['appnet_usernames'].each do |user| begin rss_feed = "https://alpha-api.app.net/feed/rss/users/@"+ user + "/posts" - + url = URI.parse rss_feed http = Net::HTTP.new url.host, url.port diff --git a/plugins/flickrlogger.rb b/plugins/flickrlogger.rb index 77f68e0..64379f7 100644 --- a/plugins/flickrlogger.rb +++ b/plugins/flickrlogger.rb @@ -62,7 +62,7 @@ def do_log sl = DayOne.new config['flickr_tags'] ||= '' - tags = config['flickr_tags'] == '' ? '' : "\n\n#{config['flickr_tags']}\n" + tags = config['flickr_tags'] == '' ? '' : "\n\n(#{config['flickr_tags']})\n" today = @timespan.to_i @log.info("Getting Flickr images for #{config['flickr_ids'].join(', ')}") diff --git a/plugins/foursquarelogger.rb b/plugins/foursquarelogger.rb index caa3b8b..3df6de0 100644 --- a/plugins/foursquarelogger.rb +++ b/plugins/foursquarelogger.rb @@ -36,7 +36,7 @@ def do_log @log.info("Getting Foursquare checkins") config['foursquare_tags'] ||= '' - @tags = "\n\n#{config['foursquare_tags']}\n" unless config['foursquare_tags'] == '' + @tags = "\n\n(#{config['foursquare_tags']})\n" unless config['foursquare_tags'] == '' @debug = config['debug'] || false entrytext = '' diff --git a/plugins/githublogger.rb b/plugins/githublogger.rb index 27ec25d..9e9d84a 100644 --- a/plugins/githublogger.rb +++ b/plugins/githublogger.rb @@ -80,7 +80,7 @@ def do_log } return false if output.strip == "" - entry = "## Github activity for #{Time.now.strftime(@date_format)}:\n\n#{output}\n#{config['github_tags']}" + entry = "## Github activity for #{Time.now.strftime(@date_format)}:\n\n#{output}\n(#{config['github_tags']})" DayOne.new.to_dayone({ 'content' => entry }) end diff --git a/plugins/goodreadslogger.rb b/plugins/goodreadslogger.rb index 8019578..a52cab6 100644 --- a/plugins/goodreadslogger.rb +++ b/plugins/goodreadslogger.rb @@ -77,7 +77,7 @@ def parse_feed(rss_feed) end tags = @grconfig['goodreads_tags'] || '' - tags = "\n\n#{tags}\n" unless tags == '' + tags = "\n\n(#{tags})\n" unless tags == '' begin rss_content = "" diff --git a/plugins/instapaperlogger.rb b/plugins/instapaperlogger.rb index 7dea63c..d3109b3 100644 --- a/plugins/instapaperlogger.rb +++ b/plugins/instapaperlogger.rb @@ -40,7 +40,7 @@ def do_log sl = DayOne.new config['instapaper_tags'] ||= '' - tags = "\n\n#{config['instapaper_tags']}\n" unless config['instapaper_tags'] == '' + tags = "\n\n(#{config['instapaper_tags']})\n" unless config['instapaper_tags'] == '' today = @timespan.to_i @log.info("Getting Instapaper posts for #{config['instapaper_feeds'].length} accounts") diff --git a/plugins/lastfmlogger.rb b/plugins/lastfmlogger.rb index f2af4f3..f6d5373 100644 --- a/plugins/lastfmlogger.rb +++ b/plugins/lastfmlogger.rb @@ -52,7 +52,7 @@ def do_log end config['lastfm_tags'] ||= '' - tags = "\n\n#{config['lastfm_tags']}\n" unless config['lastfm_tags'] == '' + tags = "\n\n(#{config['lastfm_tags']})\n" unless config['lastfm_tags'] == '' config['lastfm_feeds'] ||= ['recent', 'loved'] @@ -81,7 +81,7 @@ def do_log rss.items.each { |item| timestamp = Time.parse(item.pubDate.to_s) break if timestamp < today - ts = config['lastfm_include_timestamps'] ? "[#{timestamp.strftime(@time_format)}] " : "" + ts = config['lastfm_include_timestamps'] ? "#{timestamp.strftime(@time_format)} | " : "" title = ts + String(item.title).e_link() link = String(item.link).e_link() diff --git a/plugins/pinboardlogger.rb b/plugins/pinboardlogger.rb index ffcb4bc..7ce2b1a 100644 --- a/plugins/pinboardlogger.rb +++ b/plugins/pinboardlogger.rb @@ -43,7 +43,7 @@ def split_days(bookmarks) def digest_entry(bookmarks, tags) bookmarks.reverse.map do |t| t[:content] - end.join("\n") << "\n(#{tags})" + end.join("\n") << "\n#{tags.strip}" end def do_log @@ -60,7 +60,7 @@ def do_log sl = DayOne.new config['pinboard_tags'] ||= '' - tags = "\n\n#{config['pinboard_tags']}\n" unless config['pinboard_tags'] == '' + tags = "\n\n(#{config['pinboard_tags'].strip})\n" unless config['pinboard_tags'] == '' today = @timespan.to_i @log.info("Getting Pinboard bookmarks for #{config['pinboard_feeds'].length} feeds") @@ -100,7 +100,7 @@ def do_log unless output == '' || config['pinboard_digest'] options = {} options['datestamp'] = feed_output[0][:date].utc.iso8601 - options['content'] = "## New Pinboard bookmark\n#{output}(#{tags})" + options['content'] = "## New Pinboard bookmark\n#{output}#{tags.strip}" sl.to_dayone(options) end } @@ -108,7 +108,7 @@ def do_log rescue Exception => e puts "Error getting posts for #{rss_feed}" p e - return '' + return end end unless feed_link == '' || !config['pinboard_digest'] @@ -119,5 +119,6 @@ def do_log sl.to_dayone({'content' => content, 'datestamp' => Time.parse(k).utc.iso8601}) } end + return end end diff --git a/plugins/pocketlogger.rb b/plugins/pocketlogger.rb index c4ceba1..86ec901 100644 --- a/plugins/pocketlogger.rb +++ b/plugins/pocketlogger.rb @@ -42,7 +42,7 @@ def do_log config['pocket_tags'] ||= '' username = config['pocket_username'] password = config['pocket_passwd'] - tags = "\n\n#{config['pocket_tags']}\n" unless config['pocket_tags'] == '' + tags = "\n\n(#{config['pocket_tags']})\n" unless config['pocket_tags'] == '' today = @timespan @log.info("Getting Pocket posts for #{username}") diff --git a/plugins/rsslogger.rb b/plugins/rsslogger.rb index 447911e..9ae816b 100644 --- a/plugins/rsslogger.rb +++ b/plugins/rsslogger.rb @@ -60,7 +60,7 @@ def do_log def parse_feed(rss_feed) tags = @rssconfig['tags'] || '' - tags = "\n\n#{tags}\n" unless tags == '' + tags = "\n\n(#{tags})\n" unless tags == '' today = @timespan begin diff --git a/plugins_disabled/fitbit.rb b/plugins_disabled/fitbit.rb index 21bf144..79033cf 100644 --- a/plugins_disabled/fitbit.rb +++ b/plugins_disabled/fitbit.rb @@ -2,13 +2,13 @@ Plugin: Fitbit Description: Grabs todays fitbit stats. See fitbit.com Author: Patrice Brend'amour - + Notes: 1. To run this plugin you need to install the fitgem gem first: $ sudo gem install fitgem 2. Afterwards you can aquire a valid Fitbit Consumer token: http://dev.fitbit.com if you want to use your own. A default one is provided. 3. Upon first start, the plugin will ask you to open a URL and authorize the access to your data - + =end @@ -38,7 +38,7 @@ class FitbitLogger < Slogger def do_log if @config.key?(self.class.name) config = @config[self.class.name] - + # Check that the user has configured the plugin if !config.key?('fitbit_consumer_key') || config['fitbit_consumer_secret'] == "" @log.warn("Fitbit has not been configured, please create an application at http://dev.fitbit.com.") @@ -48,19 +48,19 @@ def do_log @log.warn("Fitbit has not been configured please edit your slogger_config file.") return end - + # ============================================================ # Init fitgem client - + oauth_token = config['fitbit_oauth_token'] oauth_secret = config['fitbit_oauth_secret'] fitbit_consumer_key = config['fitbit_consumer_key'] fitbit_consumer_secret = config['fitbit_consumer_secret'] - + client = Fitgem::Client.new(:consumer_key => fitbit_consumer_key, :consumer_secret => fitbit_consumer_secret, :unit_system => translateUnitSystem(config['fitbit_unit_system'])) developMode = $options[:develop] - - + + # ============================================================ # request oauth token if needed @log.info("#{oauth_token}") @@ -79,16 +79,16 @@ def do_log %x{open "http://www.fitbit.com/oauth/authorize?oauth_token=#{token}"} print "Paste the code you received here: " verifier = gets.strip - + begin access_token = client.authorize(token, secret, { :oauth_verifier => verifier }) - + if developMode @log.info("Verifier is: "+verifier) @log.info("Token is: "+access_token.token) @log.info("Secret is: "+access_token.secret) end - + config['fitbit_oauth_token'] = access_token.token; config['fitbit_oauth_secret'] = access_token.secret @log.info("Fitbit successfully configured, run Slogger again to continue") @@ -97,7 +97,7 @@ def do_log end return config end - + # ============================================================ # iterate over the days and create entries $i = 0 @@ -105,9 +105,9 @@ def do_log until $i >= days do currentDate = Time.now - ((60 * 60 * 24) * $i) timestring = currentDate.strftime('%F') - + @log.info("Logging Fitbit summary for #{timestring}") - + activities = client.activities_on_date(timestring) summary = activities['summary'] steps = summary['steps'] @@ -116,7 +116,7 @@ def do_log distanceUnit = client.label_for_measurement(:distance, false) activityPoints = summary['activeScore'] foodsEaten = "" - + if config['fitbit_log_body_measurements'] measurements = client.body_measurements_on_date(timestring) weight = measurements['body']['weight'] @@ -127,25 +127,25 @@ def do_log water = client.water_on_date(timestring) waterSummary = water['summary'] loggedWater = waterSummary['water'] - waterUnit = client.label_for_measurement(:liquids, false) - end + waterUnit = client.label_for_measurement(:liquids, false) + end if config['fitbit_log_sleep'] sleep = client.sleep_on_date(timestring) sleepSummary = sleep['summary'] - + hoursInBed = sleepSummary['totalTimeInBed'] / 60 minutesInBed = sleepSummary['totalTimeInBed'] - (hoursInBed * 60) timeInBed = "#{hoursInBed}h #{minutesInBed}min" - + hoursAsleep = sleepSummary['totalMinutesAsleep'] / 60 minutesAsleep = sleepSummary['totalMinutesAsleep'] - (hoursAsleep * 60) timeAsleep = "#{hoursAsleep}h #{minutesAsleep}min" end - + if config['fitbit_log_food'] foodData = client.foods_on_date(timestring) foods = foodData['foods'] - + mealList = Hash.new foodsEaten = "" totalCalories = 0 @@ -164,25 +164,25 @@ def do_log end end - + if developMode @log.info("Steps: #{steps}") @log.info("Distance: #{distance} #{distanceUnit}") @log.info("Floors: #{floors}") @log.info("ActivityPoints: #{activityPoints}") @log.info("Weight: #{weight} #{weightUnit}") - @log.info("BMI: #{bmi}") - @log.info("Water Intake: #{loggedWater} #{waterUnit}") + @log.info("BMI: #{bmi}") + @log.info("Water Intake: #{loggedWater} #{waterUnit}") @log.info("Time In Bed: #{timeInBed}") @log.info("Time Asleep: #{timeAsleep}") @log.info("Foods Eaten:\n #{foodsEaten}") end - + tags = config['fitbit_tags'] || '' - tags = "\n\n#{tags}\n" unless tags == '' - + tags = "\n\n(#{tags})\n" unless tags == '' + output = "**Steps:** #{steps}\n**Floors:** #{floors}\n**Distance:** #{distance} #{distanceUnit}\n**Activity Points:** #{activityPoints}\n" - + if config['fitbit_log_body_measurements'] output += "**Weight:** #{weight} #{weightUnit}\n**BMI:** #{bmi}\n" end @@ -196,7 +196,7 @@ def do_log if config['fitbit_log_food'] output += "**Foods eaten:** #{totalCalories} calories\n#{foodsEaten}" end - + # Create a journal entry options = {} options['content'] = "## Fitbit - Summary for #{currentDate.strftime(@date_format)}\n\n#{output}#{tags}" @@ -207,7 +207,7 @@ def do_log end return config end - + def translateMeal(mealId) case mealId when 1 @@ -233,7 +233,7 @@ def translateUnitSystem(unitSystemString) return Fitgem::ApiUnitSystem.METRIC when "UK" return Fitgem::ApiUnitSystem.UK - else + else return Fitgem::ApiUnitSystem.US end end @@ -256,10 +256,10 @@ def to_s end return mealString end - + def calories @calories end end - + diff --git a/plugins_disabled/gaugeslogger.rb b/plugins_disabled/gaugeslogger.rb index 1043d18..48601f0 100644 --- a/plugins_disabled/gaugeslogger.rb +++ b/plugins_disabled/gaugeslogger.rb @@ -61,7 +61,7 @@ def do_log date = @timespan + (60 * 60 * 24) json = gauges_api_call(key,"gauges") - return false unless json + return false unless json && json.has_key?('guages') gauges = [] while date.strftime("%Y%m%d") <= Time.now.strftime("%Y%m%d") @@ -111,7 +111,7 @@ def do_log output += "\n\n" return false if output.strip == "" - entry = "# Gaug.es report for #{gauge['title']} on #{gauge['date'].strftime(@date_format)}\n\n#{output}\n#{config['gauges_tags']}" + entry = "# Gaug.es report for #{gauge['title']} on #{gauge['date'].strftime(@date_format)}\n\n#{output}\n(#{config['gauges_tags']})" DayOne.new.to_dayone({ 'content' => entry, 'datestamp' => gauge['date'].utc.iso8601 }) } end diff --git a/plugins_disabled/googleanalyticslogger.rb b/plugins_disabled/googleanalyticslogger.rb index 2c444a7..9fe8ef8 100644 --- a/plugins_disabled/googleanalyticslogger.rb +++ b/plugins_disabled/googleanalyticslogger.rb @@ -132,7 +132,7 @@ def do_log config['access_token'] = new_tokens['access_token'] config['refresh_token'] = new_tokens['refresh_token'] - # + # # mutable_config['GoogleAnalyticsLogger']['access_token'] = new_tokens['access_token'] # mutable_config['GoogleAnalyticsLogger']['refresh_token'] = new_tokens['refresh_token'] end @@ -295,7 +295,7 @@ def do_log tags = config['tags'] || '' content.each do |key, body| logdate = "#{key[0..3]}-#{key[4..5]}-#{key[6..7]}" - body << "#{tags}" unless tags == '' + body << "(#{tags})" unless tags == '' # And Log to Day One options = {} diff --git a/plugins_disabled/omnifocus.rb b/plugins_disabled/omnifocus.rb index 7003d51..6d5320e 100644 --- a/plugins_disabled/omnifocus.rb +++ b/plugins_disabled/omnifocus.rb @@ -42,13 +42,13 @@ def do_log omnifocus_completed_tasks = config['omnifocus_completed_tasks'] || false log_notes = config['omnifocus_log_notes'] || false tags = config['tags'] || '' - tags = "\n\n#{@tags}\n" unless @tags == '' + tags = "\n\n(#{@tags})\n" unless @tags == '' + - output = '' developMode = $options[:develop] - - + + # Run an embedded applescript to get today's completed tasks if filters.empty? then @@ -62,15 +62,15 @@ def do_log if developMode @log.info("Running plugin for the last #{days} days") end - + until $i >= days do currentDate = Time.now - ((60 * 60 * 24) * $i) timestring = currentDate.strftime('%d/%m/%Y') - + if developMode @log.info("Running plugin for #{timestring}") end - + for filter in filters values = %x{osascript <<'APPLESCRIPT' set filter to "#{filter}" @@ -78,31 +78,31 @@ def do_log tell application id "com.omnigroup.OmniFocus" tell default document if filter is equal to "NONE" then - set refDoneToday to a reference to (flattened tasks where (completion date >= dteToday)) + set refDoneToday to a reference to (flattened tasks where (completion date ≥ dteToday)) else - set refDoneToday to a reference to (flattened tasks where (completion date >= dteToday) and name of containing project's folder = filter) - + set refDoneToday to a reference to (flattened tasks where (completion date ≥ dteToday) and name of containing project's folder = filter) + end if set {lstName, lstContext, lstProject, lstNote} to {name, name of its context, name of its containing project, note} of refDoneToday set strText to "" - + set numberOfItems to count of lstName repeat with iTask from 1 to numberOfItems set {strName, varContext, varProject, varNote} to {item iTask of lstName, item iTask of lstContext, item iTask of lstProject, item iTask of lstNote} - + set contextString to "null" set projectString to "null" set noteString to "null" if varContext is not missing value then set contextString to varContext if varProject is not missing value then set projectString to varProject if varNote is not missing value then set noteString to varNote - + set noteString to my replaceText(noteString, linefeed, "\\\\n") - + set delimiterString to "##__##" - + set strText to strText & strName & delimiterString & projectString & delimiterString & contextString & delimiterString & noteString & linefeed - + end repeat end tell end tell @@ -137,11 +137,11 @@ def do_log set AppleScript's text item delimiters to tempTID error errorMessage number errorNumber -- pass it on end try - + return someText end replaceText APPLESCRIPT} - + unless values.strip.empty? unless filter == "NONE" output += "\n## Tasks in #{filter}\n" @@ -151,17 +151,17 @@ def do_log # Create entries here tasks_completed += 1 #ensures that only valid characters are saved to output - + #this only works in newer ruby versions but not in the default 1.8.7 begin value = value.chars.select{|i| i.valid_encoding?}.join rescue end - + name, project, context, note = value.split("##__##") - + taskString = "## #{name}\n " - + if context != "null" taskString += "*Context:* #{context} \n" end @@ -170,9 +170,9 @@ def do_log end if note != "null" && log_notes note = note.gsub("\\n","\n> ") - taskString += "*Notes:*\n> #{note}\n" + taskString += "*Notes:*\n> #{note}" end - + output += taskString end output += "\n" diff --git a/plugins_disabled/soundcloudlogger.rb b/plugins_disabled/soundcloudlogger.rb index 76ed553..a940c50 100644 --- a/plugins_disabled/soundcloudlogger.rb +++ b/plugins_disabled/soundcloudlogger.rb @@ -65,7 +65,7 @@ def do_log def parse_feed(rss_feed) tags = @scconfig['soundcloud_tags'] || '' - tags = "\n\n#{tags}\n" unless tags == '' + tags = "\n\n(#{tags})\n" unless tags == '' starred = @scconfig['soundcloud_starred'] || false begin diff --git a/slogger.rb b/slogger.rb index ab7c31a..7043582 100755 --- a/slogger.rb +++ b/slogger.rb @@ -27,22 +27,46 @@ require SLOGGER_HOME + '/lib/sociallogger' require SLOGGER_HOME + '/lib/configtools' +require SLOGGER_HOME + '/lib/plist.rb' # require SLOGGER_HOME + '/lib/json' +if RUBY_VERSION.to_f > 1.9 + Encoding.default_external = Encoding::UTF_8 + Encoding.default_internal = Encoding::UTF_8 +end + class String def markdownify contents = '' - IO.popen('"$SLOGGER_HOME/lib/html2text"', "r+") do |io| - - Thread.new { self.each_line { |line| - io << line - }; io.close_write } + begin + if RUBY_VERSION.to_f > 1.9 + input = self.dup.force_encoding('utf-8') + else + input = self.dup + end - io.each_line do |line| - contents << line + IO.popen('"$SLOGGER_HOME/lib/html2text"', "r+") do |io| + begin + Thread.new { input.each_line { |line| + io << line + }; io.close_write } + rescue Exception => e + $stderr.puts e + end + begin + io.each_line do |line| + contents << line + end + rescue Exception => e + $stderr.puts e + end end + contents + rescue Exception => e + $stderr.puts e + $stderr.puts "Error in Markdownify" + self end - contents end # convert (multi)Markdown to HTML @@ -280,6 +304,11 @@ def template Creation Date <%= datestamp %> + Creator + + Software Agent + Slogger/#{MAJOR_VERSION}.#{MINOR_VERSION}.#{BUILD_NUMBER} + Entry Text <%= entry %> Starred @@ -353,6 +382,33 @@ def template $stdout.puts("Slogger version #{MAJOR_VERSION}.#{MINOR_VERSION}.#{BUILD_NUMBER}") exit end + opts.on( '--dedup', 'Remove duplicate entries from Journal') do + puts "This will remove entries from your Journal that have" + puts "duplicate content and matching tags. The oldest copy" + puts "of an entry will be preserved. The entries will be" + puts "moved to a DayOneDuplicates directory on your Desktop." + puts + answer = SloggerUtils.new.ask("Are you sure you want to continue?",["y","n"]) + if answer == "y" + DayOne.new.dedup + end + exit + end + ## This will be cool when it works. + # opts.on( '--dedup_similar', 'Remove similar entries from Journal') do + # puts "This will remove entries from your Journal that have" + # puts "very similar content on the same date. The oldest copy" + # puts "of an entry will be preserved. The entries will be" + # puts "moved to a DayOneDuplicates directory on your Desktop." + # puts + # puts "This is a slow process and can take >15m on large journals." + # puts + # answer = SloggerUtils.new.ask("Are you sure you want to continue?",["y","n"]) + # if answer == "y" + # DayOne.new.dedup(true) + # end + # exit + # end opts.on( '-h', '--help', 'Display this screen' ) do puts opts exit