Skip to content

Commit

Permalink
Merge pull request #1 from linkedin/master
Browse files Browse the repository at this point in the history
pull newest
  • Loading branch information
zondahuman authored Sep 18, 2017
2 parents b775aeb + d11e934 commit 319096c
Show file tree
Hide file tree
Showing 26 changed files with 1,138 additions and 246 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

[![Join the chat at https://gitter.im/linkedin/databus](https://badges.gitter.im/Join%20Chat.svg)](https://gitter.im/linkedin/databus?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)

In Internet architectures, data systems are typically categorized into source-of-truth systems that serve as primary stores for the user-generated writes, and derived data stores or indexes which serve reads and other complex queries. The data in these secondary stores is often derived from the primary data through custom transformations, sometimes involving complex processing driven by business logic. Similarly data in caching tiers is derived from reads against the primary data store, but needs to get invalidated or refreshed when the primary data gets mutated. A fundamental requirement emerging from these kinds of data architectures is the need to reliably capture, flow and process primary data changes.
In Internet architectures, data systems are typically categorized into source-of-truth systems that serve as primary stores for the user-generated writes, and derived data stores or indexes which serve reads and other complex queries. The data in these secondary stores is often derived from the primary data through custom transformations, sometimes involving complex processing driven by business logic. Similarly, data in caching tiers is derived from reads against the primary data store, but needs to get invalidated or refreshed when the primary data gets mutated. A fundamental requirement emerging from these kinds of data architectures is the need to reliably capture, flow and process primary data changes.

We have built Databus, a source-agnostic distributed change data capture system, which is an integral part of LinkedIn's data processing pipeline. The Databus transport layer provides latencies in the low milliseconds and handles throughput of thousands of events per second per server while supporting infinite look back capabilities and rich subscription functionality.

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,13 @@
import java.io.InputStream;
import java.nio.channels.Channels;
import java.nio.channels.ClosedChannelException;
import java.nio.charset.Charset;
import java.util.Formatter;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.IOUtils;
import org.apache.log4j.Logger;
import org.codehaus.jackson.map.ObjectMapper;
import org.codehaus.jackson.type.TypeReference;
Expand All @@ -51,6 +53,7 @@
import com.linkedin.databus.core.DbusPrettyLogUtils;
import com.linkedin.databus.core.async.ActorMessageQueue;
import com.linkedin.databus.core.data_model.PhysicalPartition;
import com.linkedin.databus.core.util.CompressUtil;
import com.linkedin.databus.core.util.IdNamePair;
import com.linkedin.databus.core.util.Range;
import com.linkedin.databus2.core.container.DatabusHttpHeaders;
Expand All @@ -67,6 +70,7 @@ public class NettyHttpDatabusRelayConnection
{
public static final String MODULE = NettyHttpDatabusRelayConnection.class.getName();
public static final Logger LOG = Logger.getLogger(MODULE);
public static final boolean needCompress = true;

private static enum State
{
Expand Down Expand Up @@ -239,7 +243,7 @@ private String createRegisterUrl()
uriString.append("&sources=")
.append(_sourcesSubsList);
}

uriString.append("&").append(DatabusHttpHeaders.PROTOCOL_COMPRESS_PARAM).append("=").append(needCompress);
final String url = uriString.toString();
return url;
}
Expand Down Expand Up @@ -710,6 +714,19 @@ public void finishResponse() throws Exception
else
{
InputStream bodyStream = Channels.newInputStream(_decorated);
String bodyStr = IOUtils.toString(bodyStream,Charset.defaultCharset().name());
IOUtils.closeQuietly(bodyStream);
if (NettyHttpDatabusRelayConnection.needCompress)
{
try
{
bodyStr = CompressUtil.uncompress(bodyStr);
}
catch (Exception e)//failed because the steam may be not compressed
{
}
}

ObjectMapper mapper = new ObjectMapper();
int registerResponseVersion = 3; // either 2 or 3 would suffice here; we care only about 4

Expand All @@ -734,7 +751,7 @@ public void finishResponse() throws Exception
if (registerResponseVersion == 4) // DDSDBUS-2009
{
HashMap<String, List<Object>> responseMap =
mapper.readValue(bodyStream, new TypeReference<HashMap<String, List<Object>>>() {});
mapper.readValue(bodyStr, new TypeReference<HashMap<String, List<Object>>>() {});

// Look for mandatory SOURCE_SCHEMAS_KEY.
Map<Long, List<RegisterResponseEntry>> sourcesSchemasMap = RegisterResponseEntry.createFromResponse(responseMap,
Expand All @@ -760,7 +777,7 @@ public void finishResponse() throws Exception
else // version 2 or 3
{
List<RegisterResponseEntry> schemasList =
mapper.readValue(bodyStream, new TypeReference<List<RegisterResponseEntry>>() {});
mapper.readValue(bodyStr, new TypeReference<List<RegisterResponseEntry>>() {});

Map<Long, List<RegisterResponseEntry>> sourcesSchemasMap = RegisterResponseEntry.convertSchemaListToMap(schemasList);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ public class DatabusHttpHeaders

/** protocol version param name for /register request */
public static final String PROTOCOL_VERSION_PARAM = "protocolVersion";
public static final String PROTOCOL_COMPRESS_PARAM = "compress";

/** max event version - max DbusEvent version client can understand */
public static final String MAX_EVENT_VERSION = "maxev";
Expand Down
2 changes: 2 additions & 0 deletions databus-core/databus-core-impl/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ dependencies {
compile externalDependency.json
compile externalDependency.log4j
compile externalDependency.netty
compile externalDependency.c3p0
compile externalDependency.guava

testCompile externalDependency.testng
testCompile externalDependency.easymock
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package com.linkedin.databus.core.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;

import com.google.common.io.BaseEncoding;

public class CompressUtil
{
public static String compress(String str) throws IOException
{
ByteArrayOutputStream out = new ByteArrayOutputStream();
GZIPOutputStream gzip = new GZIPOutputStream(out);
gzip.write(str.getBytes(Charset.defaultCharset()));
gzip.close();
return BaseEncoding.base64().encode(out.toByteArray());
}

public static String uncompress(String str) throws IOException
{
byte[] encodeByteArr = BaseEncoding.base64().decode(str);
ByteArrayOutputStream out = new ByteArrayOutputStream();
ByteArrayInputStream in = new ByteArrayInputStream(encodeByteArr);
GZIPInputStream gunzip = new GZIPInputStream(in);
byte[] buffer = new byte[256];
int n;
while ((n = gunzip.read(buffer)) >= 0)
{
out.write(buffer, 0, n);
}
return out.toString(Charset.defaultCharset().name());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,23 @@ public class MaxSCNReaderWriterConfig implements ConfigBuilder<MaxSCNReaderWrite

private String _type;
private FileMaxSCNHandler.Config _file;
private MysqlMaxSCNHandler.Config _mysql;
private MaxSCNReaderWriter _existing;

public MaxSCNReaderWriterConfig()
{
_type = MaxSCNReaderWriterStaticConfig.Type.FILE.toString();
_existing = null;
_file = new FileMaxSCNHandler.Config();
_mysql = new MysqlMaxSCNHandler.Config();
}

public MysqlMaxSCNHandler.Config getMysql() {
return _mysql;
}

public void setMysql(MysqlMaxSCNHandler.Config _mysql) {
this._mysql = _mysql;
}

public String getType()
Expand Down Expand Up @@ -84,7 +94,7 @@ public MaxSCNReaderWriterStaticConfig build() throws InvalidConfigException
throw new InvalidConfigException("No existing max scn reader/writer specified ");
}

return new MaxSCNReaderWriterStaticConfig(handlerType, _file.build(), _existing);
return new MaxSCNReaderWriterStaticConfig(handlerType, _file.build(), _mysql.build(), _existing);
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@
*/


import org.apache.log4j.Logger;

import com.linkedin.databus2.core.seq.FileMaxSCNHandler.StaticConfig;
import org.apache.log4j.Logger;

/**
* Static configuration for the SCN reader/writer
Expand All @@ -46,21 +45,25 @@ public enum Type
DISABLED,
FILE,
EXISTING,
IN_MEMORY
IN_MEMORY,
MYSQL
}

private final Type _type;
private final FileMaxSCNHandler.StaticConfig _file;
private final MysqlMaxSCNHandler.StaticConfig _mysql;
private final MaxSCNReaderWriter _existing;

public MaxSCNReaderWriterStaticConfig(Type type,
StaticConfig file,
MysqlMaxSCNHandler.StaticConfig mysql,
MaxSCNReaderWriter existing)
{
super();
_type = type;
_file = file;
_existing = existing;
_mysql = mysql;
}

/** Type of of the MaxSCN handler */
Expand Down Expand Up @@ -135,6 +138,21 @@ public SequenceNumberHandlerFactory createFactory()
break;
case IN_MEMORY: result = new InMemorySequenceNumberHandlerFactory(-1); break;
case DISABLED: result = null; break;
case MYSQL : {
MysqlMaxSCNHandler.Config configBuilder = new MysqlMaxSCNHandler.Config();
configBuilder.setJdbcUrl(_mysql.getJdbcUrl());
configBuilder.setScnTable(_mysql.getScnTable());
configBuilder.setDriverClass(_mysql.getDriverClass());
configBuilder.setDbPassword(_mysql.getDbPassword());
configBuilder.setDbUser(_mysql.getDbUser());
configBuilder.setFlushItvl(_mysql.getFlushItvl());
configBuilder.setInitVal(_mysql.getInitVal());
configBuilder.setUpsertSCNQuery(_mysql.getUpsertSCNQuery());
configBuilder.setGetSCNQuery(_mysql.getGetSCNQuery());
configBuilder.setScnColumnName(_mysql.getScnColumnName());

result = new MysqlMaxSCNHandlerFactory(configBuilder);
}break;
default: throw new RuntimeException("unknown scn reader/writer type: " + _type.toString());
}

Expand Down
Loading

0 comments on commit 319096c

Please sign in to comment.