Skip to content

Commit

Permalink
Added Row filtering and Column splitting logic.
Browse files Browse the repository at this point in the history
Now its possible to create additional columns which were not present in original data file. Also possible to reject rows that don't meet certain criteria.
  • Loading branch information
mvadu committed May 1, 2016
1 parent a83cd59 commit 75f0e88
Show file tree
Hide file tree
Showing 20 changed files with 970 additions and 425 deletions.
34 changes: 32 additions & 2 deletions Influxer/Config/ColumnConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ public enum ColumnDataType : int
Timestamp,
Tag,
NumericalField,
StringField
StringField,
BooleanField
}

public class ColumnConfig : ConfigurationElement, IConfigurationElementCollectionElement
Expand Down Expand Up @@ -62,12 +63,41 @@ public ExtractTransformationCollection ExtractTransformations
set { this["ExtractTransformations"] = value; }
}

[ConfigurationProperty ("FilterTransformations")]
public FilterTransformationCollection FilterTransformations
{
get { return (FilterTransformationCollection) this["FilterTransformations"]; }
set { this["FilterTransformations"] = value; }
}

[ConfigurationProperty("Split")]
public Splitter SplitConfig
{
get { return (Splitter)this["Split"]; }
set { this["Split"] = value; }
}

[ConfigurationProperty ("IsDefault")]
public bool IsDefault
{
get { return (bool) this["IsDefault"]; }
set { this["IsDefault"] = value; }
}

public string GetKey()
{
return InfluxName;
}

protected override void PostDeserialize()
{
base.PostDeserialize();
if (SplitConfig.SubColumnsConfig.Count>0 && (ExtractTransformations?.Count > 0 ||
ReplaceTransformations?.Count > 0))
{
throw new ArgumentException("A Column can be split or transformed, but not both!!");
}
}


}
}
16 changes: 16 additions & 0 deletions Influxer/Config/ExtractTransformation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,20 @@ public string RegEx
set { this["RegEx"] = value; }
}

[ConfigurationProperty("IsDefault")]
public bool IsDefault
{
get { return (bool)this["IsDefault"]; }
set { this["IsDefault"] = value; }
}

[ConfigurationProperty("DefaultValue")]
public string DefaultValue
{
get { return (string)this["DefaultValue"]; }
set { this["DefaultValue"] = value; }
}

Regex _extractPattern;
public Regex ExtractPattern
{
Expand All @@ -64,6 +78,7 @@ public Regex ExtractPattern

public bool CanTransform(string content)
{
if (IsDefault) return true;
if (Type == ExtractType.SubString)
return !String.IsNullOrWhiteSpace(content) ? content.Length > StartIndex && content.Length > (StartIndex + Length) : false;
else
Expand All @@ -73,6 +88,7 @@ public bool CanTransform(string content)

public string Transform(string content)
{
if (IsDefault) return DefaultValue;
if (Type == ExtractType.SubString)
return content.Substring(StartIndex, Length);
else
Expand Down
73 changes: 73 additions & 0 deletions Influxer/Config/FilterTransformation.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
using System;
using System.Collections.Generic;
using System.Configuration;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace AdysTech.Influxer.Config
{
public class FilterTransformation : ConfigurationElement, ITransform, IConfigurationElementCollectionElement
{

[ConfigurationProperty ("RegEx")]
public string RegEx
{
get { return (string) this["RegEx"]; }
set { this["RegEx"] = value; }
}

[ConfigurationProperty ("IsDefault")]
public bool IsDefault
{
get { return (bool) this["IsDefault"]; }
set { this["IsDefault"] = value; }
}

[ConfigurationProperty ("DefaultValue")]
public string DefaultValue
{
get { return (string) this["DefaultValue"]; }
set { this["DefaultValue"] = value; }
}

Regex _extractPattern;
public Regex ExtractPattern
{
get
{
if (_extractPattern == null && !String.IsNullOrWhiteSpace (RegEx))
{
_extractPattern = new Regex (RegEx, RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
return _extractPattern;
}
}



public bool CanTransform (string content)
{
if (IsDefault) return true;

return !String.IsNullOrWhiteSpace (content) ? ExtractPattern.IsMatch (content) : false;

}

public string Transform (string content)
{
if (CanTransform (content))
{
throw new InvalidDataException (String.Format ("{0} filtered out as per rule {1}", content, RegEx));
}
return string.Empty;
}

public string GetKey ()
{
return this.GetHashCode ().ToString ();
}
}
}
8 changes: 8 additions & 0 deletions Influxer/Config/GenericFileConfig.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,13 @@ public ColumnLayoutConfig ColumnLayout
get { return (ColumnLayoutConfig)this["ColumnLayout"]; }
set { this["ColumnLayout"] = value; }
}

[CommandLineArgAttribute ("-ignoreerrors", Usage = "-ignoreerrors true", Description = "Ignore too many errors due to invalid data or config file")]
[ConfigurationProperty ("IgnoreErrors")]
public bool IgnoreErrors
{
get { return (bool) this["IgnoreErrors"]; }
set { this["IgnoreErrors"] = value; }
}
}
}
17 changes: 17 additions & 0 deletions Influxer/Config/ISplit.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading.Tasks;

namespace AdysTech.Influxer.Config
{
public interface ISplit
{
bool CanSplit(string content);
Dictionary<ColumnConfig, string> Split(string content);
IList<ColumnConfig> SubColumns { get; }
}


}
4 changes: 4 additions & 0 deletions Influxer/Config/ITransform.cs
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,9 @@ public interface ITransform
{
bool CanTransform(string content);
string Transform(string content);
bool IsDefault { get; set; }
string DefaultValue { get; set; }
}


}
19 changes: 18 additions & 1 deletion Influxer/Config/ReplaceTransformation.cs
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,25 @@ public string ReplaceWith
set { this["ReplaceWith"] = value; }
}

[ConfigurationProperty("IsDefault")]
public bool IsDefault
{
get { return (bool)this["IsDefault"]; }
set { this["IsDefault"] = value; }
}

[ConfigurationProperty("DefaultValue")]
public string DefaultValue
{
get { return (string)this["DefaultValue"]; }
set { this["DefaultValue"] = value; }
}



public bool CanTransform(string content)
public bool CanTransform(string content)
{
if (IsDefault) return true;
return !String.IsNullOrWhiteSpace(content) ? content.Contains(FindText) : false;
}

Expand All @@ -37,6 +53,7 @@ public string GetKey()

public string Transform(string content)
{
if (IsDefault) return DefaultValue;
return content.Replace(FindText, ReplaceWith);
}
}
Expand Down
107 changes: 107 additions & 0 deletions Influxer/Config/Splitter.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
using System;
using System.Collections.Generic;
using System.Configuration;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

namespace AdysTech.Influxer.Config
{
public enum SplitType
{
Delimited,
FixedWidth
}

public class Splitter : ConfigurationElement, ISplit
{

[ConfigurationProperty ("Type")]
public SplitType Type
{
get { return (SplitType) this["Type"]; }
set { this["Type"] = value; }
}

[ConfigurationProperty ("Width")]
public int Width
{
get { return (int) this["Width"]; }
set { this["Width"] = value; }
}


[ConfigurationProperty ("Delimiter")]
public string Delimiter
{
get { return (string) this["Delimiter"]; }
set { this["Delimiter"] = value; }
}

[ConfigurationProperty ("SplitColumns")]
public ColumnLayoutConfig SubColumnsConfig
{
get { return (ColumnLayoutConfig) this["SplitColumns"]; }
set { this["SplitColumns"] = value; }
}



Regex _splitPattern;
public Regex SplitPattern
{
get
{
if (Type == SplitType.Delimited && _splitPattern == null && !String.IsNullOrWhiteSpace (Delimiter))
{
_splitPattern = new Regex (Delimiter, RegexOptions.Compiled | RegexOptions.IgnoreCase);
}
return _splitPattern;
}
}

public IList<ColumnConfig> SubColumns
{
get
{
var ls = new List<ColumnConfig> (SubColumnsConfig.Count + SubColumnsConfig.Select (t => t.SplitConfig?.SubColumnsConfig?.Count).Sum ().Value);
ls.AddRange (SubColumnsConfig);
ls.AddRange (SubColumnsConfig.SelectMany (t => t.SplitConfig?.SubColumns));
return ls;
}
}

public bool CanSplit (string content)
{
if (String.IsNullOrWhiteSpace (content)) return false;

return (Type == SplitType.FixedWidth) ? content.Length > Width : SplitPattern.IsMatch (content);

}

public Dictionary<ColumnConfig, string> Split (string content)
{
IList<string> values = null;
if (Type == SplitType.FixedWidth)
values = content.SplitFixedWidth (Width).ToList ();
else
values = SplitPattern.Split (content).ToList ();
var ret = new Dictionary<ColumnConfig, string> ();
for (int i = 0; i < SubColumnsConfig.Count; i++)
{
if (SubColumnsConfig[i].SplitConfig?.SubColumns?.Count > 0)
{
var subColumns = SubColumnsConfig[i].SplitConfig.Split (values[i]);
foreach (var c in subColumns)
ret.Add (c.Key, c.Value);
}
else
{
ret.Add (SubColumnsConfig[i], values[i]);
}
}
return ret;
}
}
}
18 changes: 17 additions & 1 deletion Influxer/Config/TransformationCollection.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,29 @@ namespace AdysTech.Influxer.Config
[ConfigurationCollection(typeof(ReplaceTransformation),AddItemName ="Replace", CollectionType = ConfigurationElementCollectionType.BasicMap)]
public class ReplaceTransformationCollection : ConfigurationElementCollection<ReplaceTransformation>
{

protected override void PostDeserialize()
{
base.PostDeserialize();
if (this.Count(t => t.IsDefault) > 1)
throw new ArgumentException("Only one instance can be marked as Default");
}
}

[ConfigurationCollection(typeof(ExtractTransformation),AddItemName ="Extract", CollectionType = ConfigurationElementCollectionType.BasicMap)]
public class ExtractTransformationCollection : ConfigurationElementCollection<ExtractTransformation>
{
protected override void PostDeserialize()
{
base.PostDeserialize();
if (this.Count(t => t.IsDefault) > 1)
throw new ArgumentException("Only one instance can be marked as Default");
}
}

[ConfigurationCollection (typeof (FilterTransformation), AddItemName = "Filter", CollectionType = ConfigurationElementCollectionType.BasicMap)]
public class FilterTransformationCollection : ConfigurationElementCollection<FilterTransformation>
{

}
}

Loading

0 comments on commit 75f0e88

Please sign in to comment.