-
Notifications
You must be signed in to change notification settings - Fork 117
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Imagenet Pipeline #120
Imagenet Pipeline #120
Changes from 10 commits
222ac0c
502b06e
6d11876
8ef4b58
c9e21ed
ae8e72c
8564ae0
f8b7718
71c1431
5416570
7e93dc7
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,9 +8,13 @@ import pipelines.{FunctionNode, Transformer} | |
* Given a collection of Dense Matrices, this will generate a sample of `numSamples` columns from the entire set. | ||
* @param numSamples | ||
*/ | ||
class ColumnSampler(numSamples: Int) extends Transformer[DenseMatrix[Float], DenseVector[Float]] { | ||
override def apply(in: RDD[DenseMatrix[Float]]): RDD[DenseVector[Float]] = { | ||
val numImgs = in.count.toInt | ||
class ColumnSampler( | ||
numSamples: Int, | ||
numImgsOpt: Option[Int] = None) | ||
extends FunctionNode[RDD[DenseMatrix[Float]], RDD[DenseVector[Float]]] { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Nevermind, I get it. |
||
|
||
def apply(in: RDD[DenseMatrix[Float]]): RDD[DenseVector[Float]] = { | ||
val numImgs = numImgsOpt.getOrElse(in.count.toInt) | ||
val samplesPerImage = numSamples/numImgs | ||
|
||
in.flatMap(mat => { | ||
|
@@ -20,7 +24,6 @@ class ColumnSampler(numSamples: Int) extends Transformer[DenseMatrix[Float], Den | |
}) | ||
} | ||
|
||
def apply(in: DenseMatrix[Float]): DenseVector[Float] = ??? | ||
} | ||
|
||
/** | ||
|
@@ -31,4 +34,4 @@ class Sampler[T](val size: Int, val seed: Int = 42) extends FunctionNode[RDD[T], | |
def apply(in: RDD[T]): Array[T] = { | ||
in.takeSample(false, size, seed) | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
package nodes.stats | ||
|
||
import breeze.linalg.DenseVector | ||
import breeze.linalg.{DenseVector, DenseMatrix} | ||
import breeze.numerics._ | ||
import pipelines.Transformer | ||
|
||
|
@@ -13,4 +13,10 @@ object SignedHellingerMapper extends Transformer[DenseVector[Double], DenseVecto | |
def apply(in: DenseVector[Double]): DenseVector[Double] = { | ||
signum(in) :* sqrt(abs(in)) | ||
} | ||
} | ||
} | ||
|
||
object BatchSignedHellingerMapper extends Transformer[DenseMatrix[Float], DenseMatrix[Float]] { | ||
def apply(in: DenseMatrix[Float]): DenseMatrix[Float] = { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I see a pattern emerging - perhaps we want to support Vector or Matrix in NumericTransformer? |
||
in.map(x => (math.signum(x) * math.sqrt(math.abs(x))).toFloat) | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it a problem to have a single version of these with
None
or does it break theEstimator
API?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah I tried it and it breaks the api.