diff --git a/README.md b/README.md index b6a39720c..6ceeb2d35 100644 --- a/README.md +++ b/README.md @@ -10,16 +10,17 @@ # 已支持 * 源表:kafka 0.9,1.x版本 - * 维表:mysql,hbase,mongo,redis - * 结果表:mysql,hbase,elasticsearch5.x,mongo,redis + * 维表:mysql,SQlServer,oracle,hbase,mongo,redis,cassandra + * 结果表:mysql,SQlServer,oracle,hbase,elasticsearch5.x,mongo,redis,cassandra,console # 后续开发计划 - * 增加oracle维表,结果表功能 - * 增加SQlServer维表,结果表功能 - * 增加kafka结果表功能 * 增加SQL支持CEP * 维表快照 * sql优化(谓词下移等) + * serverSocket 源表 + * console 结果表 + * kafka avro格式 + * topN ## 1 快速起步 ### 1.1 运行模式 @@ -145,6 +146,7 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack ## 2 结构 ### 2.1 源表插件 * [kafka 源表插件](docs/kafkaSource.md) +* [serverSocket 源表插件](docs/serverSocketSource.md) ### 2.2 结果表插件 * [elasticsearch 结果表插件](docs/elasticsearchSink.md) @@ -152,12 +154,15 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack * [mysql 结果表插件](docs/mysqlSink.md) * [mongo 结果表插件](docs/mongoSink.md) * [redis 结果表插件](docs/redisSink.md) +* [cassandra 结果表插件](docs/cassandraSink.md) +* [console 结果表插件](docs/consoleSink.md) ### 2.3 维表插件 * [hbase 维表插件](docs/hbaseSide.md) * [mysql 维表插件](docs/mysqlSide.md) * [mongo 维表插件](docs/mongoSide.md) * [redis 维表插件](docs/redisSide.md) +* [cassandra 维表插件](docs/cassandraSide.md) ## 3 性能指标(新增) @@ -188,7 +193,7 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack ``` -CREATE (scala|table) FUNCTION CHARACTER_LENGTH WITH com.dtstack.Kun +CREATE (scala|table) FUNCTION CHARACTER_LENGTH WITH com.dtstack.Kun; CREATE TABLE MyTable( diff --git a/cassandra/cassandra-side/cassandra-all-side/pom.xml b/cassandra/cassandra-side/cassandra-all-side/pom.xml new file mode 100644 index 000000000..74c62afdb --- /dev/null +++ b/cassandra/cassandra-side/cassandra-all-side/pom.xml @@ -0,0 +1,88 @@ + + + + sql.side.cassandra + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.all.cassandra + cassandra-all-side + + jar + + + + com.dtstack.flink + sql.side.cassandra.core + 1.0-SNAPSHOT + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java new file mode 100644 index 000000000..3a7e56902 --- /dev/null +++ b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java @@ -0,0 +1,305 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.cassandra; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.ConsistencyLevel; +import com.datastax.driver.core.HostDistance; +import com.datastax.driver.core.PoolingOptions; +import com.datastax.driver.core.QueryOptions; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.policies.DowngradingConsistencyRetryPolicy; +import com.datastax.driver.core.policies.RetryPolicy; +import com.dtstack.flink.sql.side.AllReqRow; +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo; +import org.apache.calcite.sql.JoinType; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; +import org.apache.flink.util.Collector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicReference; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraAllReqRow extends AllReqRow { + + private static final long serialVersionUID = 54015343561288219L; + + private static final Logger LOG = LoggerFactory.getLogger(CassandraAllReqRow.class); + + private static final String cassandra_DRIVER = "com.cassandra.jdbc.Driver"; + + private static final int CONN_RETRY_NUM = 3; + + private static final int FETCH_SIZE = 1000; + + private transient Cluster cluster; + private transient Session session = null; + + private AtomicReference>>> cacheRef = new AtomicReference<>(); + + public CassandraAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new com.dtstack.flink.sql.side.cassandra.CassandraAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + @Override + public Row fillData(Row input, Object sideInput) { + Map cacheInfo = (Map) sideInput; + Row row = new Row(sideInfo.getOutFieldInfoList().size()); + for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { + Object obj = input.getField(entry.getValue()); + boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); + + //Type information for indicating event or processing time. However, it behaves like a regular SQL timestamp but is serialized as Long. + if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { + obj = ((Timestamp) obj).getTime(); + } + row.setField(entry.getKey(), obj); + } + + for (Map.Entry entry : sideInfo.getSideFieldNameIndex().entrySet()) { + if (cacheInfo == null) { + row.setField(entry.getKey(), null); + } else { + row.setField(entry.getKey(), cacheInfo.get(entry.getValue())); + } + } + + return row; + } + + @Override + protected void initCache() throws SQLException { + Map>> newCache = Maps.newConcurrentMap(); + cacheRef.set(newCache); + loadData(newCache); + } + + @Override + protected void reloadCache() { + //reload cacheRef and replace to old cacheRef + Map>> newCache = Maps.newConcurrentMap(); + try { + loadData(newCache); + } catch (SQLException e) { + LOG.error("", e); + } + + cacheRef.set(newCache); + LOG.info("----- cassandra all cacheRef reload end:{}", Calendar.getInstance()); + } + + + @Override + public void flatMap(Row value, Collector out) throws Exception { + List inputParams = Lists.newArrayList(); + for (Integer conValIndex : sideInfo.getEqualValIndex()) { + Object equalObj = value.getField(conValIndex); + if (equalObj == null) { + out.collect(null); + } + + inputParams.add(equalObj); + } + + String key = buildKey(inputParams); + List> cacheList = cacheRef.get().get(key); + if (CollectionUtils.isEmpty(cacheList)) { + if (sideInfo.getJoinType() == JoinType.LEFT) { + Row row = fillData(value, null); + out.collect(row); + } else { + return; + } + + return; + } + + for (Map one : cacheList) { + out.collect(fillData(value, one)); + } + + } + + private String buildKey(List equalValList) { + StringBuilder sb = new StringBuilder(""); + for (Object equalVal : equalValList) { + sb.append(equalVal).append("_"); + } + + return sb.toString(); + } + + private String buildKey(Map val, List equalFieldList) { + StringBuilder sb = new StringBuilder(""); + for (String equalField : equalFieldList) { + sb.append(val.get(equalField)).append("_"); + } + + return sb.toString(); + } + + private Session getConn(CassandraSideTableInfo tableInfo) { + try { + if (session == null) { + QueryOptions queryOptions = new QueryOptions(); + //The default consistency level for queries: ConsistencyLevel.TWO. + queryOptions.setConsistencyLevel(ConsistencyLevel.QUORUM); + Integer maxRequestsPerConnection = tableInfo.getMaxRequestsPerConnection() == null ? 1 : tableInfo.getMaxRequestsPerConnection(); + Integer coreConnectionsPerHost = tableInfo.getCoreConnectionsPerHost() == null ? 8 : tableInfo.getCoreConnectionsPerHost(); + Integer maxConnectionsPerHost = tableInfo.getMaxConnectionsPerHost() == null ? 32768 : tableInfo.getMaxConnectionsPerHost(); + Integer maxQueueSize = tableInfo.getMaxQueueSize() == null ? 100000 : tableInfo.getMaxQueueSize(); + Integer readTimeoutMillis = tableInfo.getReadTimeoutMillis() == null ? 60000 : tableInfo.getReadTimeoutMillis(); + Integer connectTimeoutMillis = tableInfo.getConnectTimeoutMillis() == null ? 60000 : tableInfo.getConnectTimeoutMillis(); + Integer poolTimeoutMillis = tableInfo.getPoolTimeoutMillis() == null ? 60000 : tableInfo.getPoolTimeoutMillis(); + Integer cassandraPort = 0; + String address = tableInfo.getAddress(); + String userName = tableInfo.getUserName(); + String password = tableInfo.getPassword(); + String database = tableInfo.getDatabase(); + + ArrayList serversList = new ArrayList(); + //Read timeout or connection timeout Settings + SocketOptions so = new SocketOptions() + .setReadTimeoutMillis(readTimeoutMillis) + .setConnectTimeoutMillis(connectTimeoutMillis); + + //The cluster USES hostdistance.local in the same machine room + //Hostdistance. REMOTE is used for different machine rooms + //Ignore use HostDistance. IGNORED + PoolingOptions poolingOptions = new PoolingOptions() + //Each connection allows a maximum of 64 concurrent requests + .setMaxRequestsPerConnection(HostDistance.LOCAL, maxRequestsPerConnection) + //Have at least two connections to each machine in the cluster + .setCoreConnectionsPerHost(HostDistance.LOCAL, coreConnectionsPerHost) + //There are up to eight connections to each machine in the cluster + .setMaxConnectionsPerHost(HostDistance.LOCAL, maxConnectionsPerHost) + .setMaxQueueSize(maxQueueSize) + .setPoolTimeoutMillis(poolTimeoutMillis); + //重试策略 + RetryPolicy retryPolicy = DowngradingConsistencyRetryPolicy.INSTANCE; + + for (String server : address.split(",")) { + cassandraPort = Integer.parseInt(server.split(":")[1]); + serversList.add(InetAddress.getByName(server.split(":")[0])); + } + + if (userName == null || userName.isEmpty() || password == null || password.isEmpty()) { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withQueryOptions(queryOptions).build(); + } else { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withCredentials(userName, password) + .withQueryOptions(queryOptions).build(); + } + // 建立连接 连接已存在的键空间 + session = cluster.connect(database); + LOG.info("connect cassandra is successed!"); + } + } catch (Exception e) { + LOG.error("connect cassandra is error:" + e.getMessage()); + } + return session; + } + + + private void loadData(Map>> tmpCache) throws SQLException { + CassandraSideTableInfo tableInfo = (CassandraSideTableInfo) sideInfo.getSideTableInfo(); + Session session = null; + + try { + for (int i = 0; i < CONN_RETRY_NUM; i++) { + try { + session = getConn(tableInfo); + break; + } catch (Exception e) { + if (i == CONN_RETRY_NUM - 1) { + throw new RuntimeException("", e); + } + try { + String connInfo = "address:" + tableInfo.getAddress() + ";userName:" + tableInfo.getUserName() + + ",pwd:" + tableInfo.getPassword(); + LOG.warn("get conn fail, wait for 5 sec and try again, connInfo:" + connInfo); + Thread.sleep(5 * 1000); + } catch (InterruptedException e1) { + e1.printStackTrace(); + } + } + + } + + //load data from table + String sql = sideInfo.getSqlCondition() + " limit " + FETCH_SIZE; + ResultSet resultSet = session.execute(sql); + String[] sideFieldNames = sideInfo.getSideSelectFields().split(","); + for (com.datastax.driver.core.Row row : resultSet) { + Map oneRow = Maps.newHashMap(); + for (String fieldName : sideFieldNames) { + oneRow.put(fieldName.trim(), row.getObject(fieldName.trim())); + } + String cacheKey = buildKey(oneRow, sideInfo.getEqualFieldList()); + List> list = tmpCache.computeIfAbsent(cacheKey, key -> Lists.newArrayList()); + list.add(oneRow); + } + } catch (Exception e) { + LOG.error("", e); + } finally { + try { + if (session != null) { + session.close(); + } + } catch (Exception e) { + LOG.error("Error while closing session.", e); + } + try { + if (cluster != null) { + cluster.close(); + } + } catch (Exception e) { + LOG.error("Error while closing cluster.", e); + } + } + } +} diff --git a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java new file mode 100644 index 000000000..bba39fc3e --- /dev/null +++ b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.cassandra; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; + +import java.util.List; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraAllSideInfo extends SideInfo { + + private static final long serialVersionUID = -8690814317653033557L; + + public CassandraAllSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { + CassandraSideTableInfo cassandraSideTableInfo = (CassandraSideTableInfo) sideTableInfo; + + sqlCondition = "select ${selectField} from ${tableName} "; + sqlCondition = sqlCondition.replace("${tableName}", cassandraSideTableInfo.getDatabase() + "." + + cassandraSideTableInfo.getTableName()).replace("${selectField}", sideSelectFields); + System.out.println("---------side_exe_sql-----\n" + sqlCondition); + } + + @Override + public void parseSelectFields(JoinInfo joinInfo) { + String sideTableName = joinInfo.getSideTableName(); + String nonSideTableName = joinInfo.getNonSideTable(); + List fields = Lists.newArrayList(); + + int sideIndex = 0; + for (int i = 0; i < outFieldInfoList.size(); i++) { + FieldInfo fieldInfo = outFieldInfoList.get(i); + if (fieldInfo.getTable().equalsIgnoreCase(sideTableName)) { + fields.add(fieldInfo.getFieldName()); + sideFieldIndex.put(i, sideIndex); + sideFieldNameIndex.put(i, fieldInfo.getFieldName()); + sideIndex++; + } else if (fieldInfo.getTable().equalsIgnoreCase(nonSideTableName)) { + int nonSideIndex = rowTypeInfo.getFieldIndex(fieldInfo.getFieldName()); + inFieldIndex.put(i, nonSideIndex); + } else { + throw new RuntimeException("unknown table " + fieldInfo.getTable()); + } + } + + if (fields.size() == 0) { + throw new RuntimeException("select non field from table " + sideTableName); + } + + //add join on condition field to select fields + SqlNode conditionNode = joinInfo.getCondition(); + + List sqlNodeList = Lists.newArrayList(); + if (conditionNode.getKind() == SqlKind.AND) { + sqlNodeList.addAll(Lists.newArrayList(((SqlBasicCall) conditionNode).getOperands())); + } else { + sqlNodeList.add(conditionNode); + } + + for (SqlNode sqlNode : sqlNodeList) { + dealOneEqualCon(sqlNode, sideTableName); + } + + if (CollectionUtils.isEmpty(equalFieldList)) { + throw new RuntimeException("no join condition found after table " + joinInfo.getLeftTableName()); + } + + for (String equalField : equalFieldList) { + if (fields.contains(equalField)) { + continue; + } + + fields.add(equalField); + } + + sideSelectFields = String.join(",", fields); + } +} diff --git a/cassandra/cassandra-side/cassandra-async-side/pom.xml b/cassandra/cassandra-side/cassandra-async-side/pom.xml new file mode 100644 index 000000000..cd709fecd --- /dev/null +++ b/cassandra/cassandra-side/cassandra-async-side/pom.xml @@ -0,0 +1,103 @@ + + + + sql.side.cassandra + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.async.cassandra + + cassandra-async-side + + jar + + + + + + io.vertx + vertx-jdbc-client + 3.5.2 + + + + io.vertx + vertx-core + 3.5.2 + + + + com.dtstack.flink + sql.side.cassandra.core + 1.0-SNAPSHOT + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java new file mode 100644 index 000000000..94c8e6fb6 --- /dev/null +++ b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java @@ -0,0 +1,309 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.cassandra; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.ConsistencyLevel; +import com.datastax.driver.core.HostDistance; +import com.datastax.driver.core.PoolingOptions; +import com.datastax.driver.core.QueryOptions; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.policies.DowngradingConsistencyRetryPolicy; +import com.datastax.driver.core.policies.RetryPolicy; +import com.dtstack.flink.sql.enums.ECacheContentType; +import com.dtstack.flink.sql.side.AsyncReqRow; +import com.dtstack.flink.sql.side.CacheMissVal; +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.cache.CacheObj; +import com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo; +import com.google.common.base.Function; +import com.google.common.util.concurrent.AsyncFunction; +import com.google.common.util.concurrent.FutureCallback; +import com.google.common.util.concurrent.Futures; +import com.google.common.util.concurrent.ListenableFuture; +import io.vertx.core.json.JsonArray; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import org.apache.flink.streaming.api.functions.async.ResultFuture; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.InetAddress; +import java.sql.Timestamp; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraAsyncReqRow extends AsyncReqRow { + + private static final long serialVersionUID = 6631584128079864735L; + + private static final Logger LOG = LoggerFactory.getLogger(CassandraAsyncReqRow.class); + + private final static int DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE = 10; + + private final static int DEFAULT_VERTX_WORKER_POOL_SIZE = 20; + + private final static int DEFAULT_MAX_DB_CONN_POOL_SIZE = 20; + + private transient Cluster cluster; + private transient ListenableFuture session; + private transient CassandraSideTableInfo cassandraSideTableInfo; + + public CassandraAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new com.dtstack.flink.sql.side.cassandra.CassandraAsyncSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + cassandraSideTableInfo = (CassandraSideTableInfo) sideInfo.getSideTableInfo(); + connCassandraDB(cassandraSideTableInfo); + } + + private void connCassandraDB(CassandraSideTableInfo tableInfo) { + try { + if (session == null) { + QueryOptions queryOptions = new QueryOptions(); + //The default consistency level for queries: ConsistencyLevel.TWO. + queryOptions.setConsistencyLevel(ConsistencyLevel.QUORUM); + Integer maxRequestsPerConnection = tableInfo.getMaxRequestsPerConnection() == null ? 1 : tableInfo.getMaxRequestsPerConnection(); + Integer coreConnectionsPerHost = tableInfo.getCoreConnectionsPerHost() == null ? 8 : tableInfo.getCoreConnectionsPerHost(); + Integer maxConnectionsPerHost = tableInfo.getMaxConnectionsPerHost() == null ? 32768 : tableInfo.getMaxConnectionsPerHost(); + Integer maxQueueSize = tableInfo.getMaxQueueSize() == null ? 100000 : tableInfo.getMaxQueueSize(); + Integer readTimeoutMillis = tableInfo.getReadTimeoutMillis() == null ? 60000 : tableInfo.getReadTimeoutMillis(); + Integer connectTimeoutMillis = tableInfo.getConnectTimeoutMillis() == null ? 60000 : tableInfo.getConnectTimeoutMillis(); + Integer poolTimeoutMillis = tableInfo.getPoolTimeoutMillis() == null ? 60000 : tableInfo.getPoolTimeoutMillis(); + Integer cassandraPort = 0; + String address = tableInfo.getAddress(); + String userName = tableInfo.getUserName(); + String password = tableInfo.getPassword(); + String database = tableInfo.getDatabase(); + + ArrayList serversList = new ArrayList(); + //Read timeout or connection timeout Settings + SocketOptions so = new SocketOptions() + .setReadTimeoutMillis(readTimeoutMillis) + .setConnectTimeoutMillis(connectTimeoutMillis); + + //The cluster USES hostdistance.local in the same machine room + //Hostdistance. REMOTE is used for different machine rooms + //Ignore use HostDistance. IGNORED + PoolingOptions poolingOptions = new PoolingOptions() + //Each connection allows a maximum of 64 concurrent requests + .setMaxRequestsPerConnection(HostDistance.LOCAL, maxRequestsPerConnection) + //Have at least two connections to each machine in the cluster + .setCoreConnectionsPerHost(HostDistance.LOCAL, coreConnectionsPerHost) + //There are up to eight connections to each machine in the cluster + .setMaxConnectionsPerHost(HostDistance.LOCAL, maxConnectionsPerHost) + .setMaxQueueSize(maxQueueSize) + .setPoolTimeoutMillis(poolTimeoutMillis); + //重试策略 + RetryPolicy retryPolicy = DowngradingConsistencyRetryPolicy.INSTANCE; + + for (String server : address.split(",")) { + cassandraPort = Integer.parseInt(server.split(":")[1]); + serversList.add(InetAddress.getByName(server.split(":")[0])); + } + + if (userName == null || userName.isEmpty() || password == null || password.isEmpty()) { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withQueryOptions(queryOptions).build(); + } else { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withCredentials(userName, password) + .withQueryOptions(queryOptions).build(); + } + // 建立连接 连接已存在的键空间 + session = cluster.connectAsync(database); + LOG.info("connect cassandra is successed!"); + } + } catch (Exception e) { + LOG.error("connect cassandra is error:" + e.getMessage()); + } + } + + @Override + public void asyncInvoke(Row input, ResultFuture resultFuture) throws Exception { + + JsonArray inputParams = new JsonArray(); + StringBuffer stringBuffer = new StringBuffer(); + String sqlWhere = " where "; + + for (int i = 0; i < sideInfo.getEqualFieldList().size(); i++) { + Integer conValIndex = sideInfo.getEqualValIndex().get(i); + Object equalObj = input.getField(conValIndex); + if (equalObj == null) { + resultFuture.complete(null); + } + inputParams.add(equalObj); + stringBuffer.append(sideInfo.getEqualFieldList().get(i)) + .append(" = ").append("'" + equalObj + "'") + .append(" and "); + } + + String key = buildCacheKey(inputParams); + sqlWhere = sqlWhere + stringBuffer.toString().substring(0, stringBuffer.lastIndexOf(" and ")); + + if (openCache()) { + CacheObj val = getFromCache(key); + if (val != null) { + + if (ECacheContentType.MissVal == val.getType()) { + dealMissKey(input, resultFuture); + return; + } else if (ECacheContentType.MultiLine == val.getType()) { + + for (Object rowArray : (List) val.getContent()) { + Row row = fillData(input, rowArray); + resultFuture.complete(Collections.singleton(row)); + } + + } else { + throw new RuntimeException("not support cache obj type " + val.getType()); + } + return; + } + } + + //connect Cassandra + connCassandraDB(cassandraSideTableInfo); + + String sqlCondition = sideInfo.getSqlCondition() + " " + sqlWhere; + System.out.println("sqlCondition:" + sqlCondition); + + ListenableFuture resultSet = Futures.transformAsync(session, + new AsyncFunction() { + @Override + public ListenableFuture apply(Session session) throws Exception { + return session.executeAsync(sqlCondition); + } + }); + + ListenableFuture> data = Futures.transform(resultSet, + new Function>() { + @Override + public List apply(ResultSet rs) { + return rs.all(); + } + }); + + Futures.addCallback(data, new FutureCallback>() { + @Override + public void onSuccess(List rows) { + cluster.closeAsync(); + if (rows.size() > 0) { + List cacheContent = Lists.newArrayList(); + for (com.datastax.driver.core.Row line : rows) { + Row row = fillData(input, line); + if (openCache()) { + cacheContent.add(line); + } + resultFuture.complete(Collections.singleton(row)); + } + + if (openCache()) { + putCache(key, CacheObj.buildCacheObj(ECacheContentType.MultiLine, cacheContent)); + } + } else { + dealMissKey(input, resultFuture); + if (openCache()) { + putCache(key, CacheMissVal.getMissKeyObj()); + } + resultFuture.complete(null); + } + } + + @Override + public void onFailure(Throwable t) { + LOG.error("Failed to retrieve the data: %s%n", + t.getMessage()); + System.out.println("Failed to retrieve the data: " + t.getMessage()); + cluster.closeAsync(); + resultFuture.complete(null); + } + }); + } + + @Override + public Row fillData(Row input, Object line) { + com.datastax.driver.core.Row rowArray = (com.datastax.driver.core.Row) line; + Row row = new Row(sideInfo.getOutFieldInfoList().size()); + for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { + Object obj = input.getField(entry.getValue()); + boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); + + if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { + obj = ((Timestamp) obj).getTime(); + } + + row.setField(entry.getKey(), obj); + } + + for (Map.Entry entry : sideInfo.getSideFieldIndex().entrySet()) { + if (rowArray == null) { + row.setField(entry.getKey(), null); + } else { + row.setField(entry.getKey(), rowArray.getObject(entry.getValue())); + } + } + + System.out.println("row:" + row.toString()); + return row; + } + + @Override + public void close() throws Exception { + super.close(); + if (cluster != null) { + cluster.close(); + cluster = null; + } + } + + public String buildCacheKey(JsonArray jsonArray) { + StringBuilder sb = new StringBuilder(); + for (Object ele : jsonArray.getList()) { + sb.append(ele.toString()) + .append("_"); + } + + return sb.toString(); + } +} diff --git a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java new file mode 100644 index 000000000..b1d239440 --- /dev/null +++ b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.cassandra; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; + +import java.util.List; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraAsyncSideInfo extends SideInfo { + + private static final long serialVersionUID = -4403313049809013362L; + + public CassandraAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { + CassandraSideTableInfo cassandraSideTableInfo = (CassandraSideTableInfo) sideTableInfo; + + String sideTableName = joinInfo.getSideTableName(); + + SqlNode conditionNode = joinInfo.getCondition(); + + List sqlNodeList = Lists.newArrayList(); + if (conditionNode.getKind() == SqlKind.AND) { + sqlNodeList.addAll(Lists.newArrayList(((SqlBasicCall) conditionNode).getOperands())); + } else { + sqlNodeList.add(conditionNode); + } + + for (SqlNode sqlNode : sqlNodeList) { + dealOneEqualCon(sqlNode, sideTableName); + } + + sqlCondition = "select ${selectField} from ${tableName}"; + + sqlCondition = sqlCondition.replace("${tableName}", cassandraSideTableInfo.getDatabase()+"."+cassandraSideTableInfo.getTableName()).replace("${selectField}", sideSelectFields); + System.out.println("---------side_exe_sql-----\n" + sqlCondition); + } + + + @Override + public void dealOneEqualCon(SqlNode sqlNode, String sideTableName) { + if (sqlNode.getKind() != SqlKind.EQUALS) { + throw new RuntimeException("not equal operator."); + } + + SqlIdentifier left = (SqlIdentifier) ((SqlBasicCall) sqlNode).getOperands()[0]; + SqlIdentifier right = (SqlIdentifier) ((SqlBasicCall) sqlNode).getOperands()[1]; + + String leftTableName = left.getComponent(0).getSimple(); + String leftField = left.getComponent(1).getSimple(); + + String rightTableName = right.getComponent(0).getSimple(); + String rightField = right.getComponent(1).getSimple(); + + if (leftTableName.equalsIgnoreCase(sideTableName)) { + equalFieldList.add(leftField); + int equalFieldIndex = -1; + for (int i = 0; i < rowTypeInfo.getFieldNames().length; i++) { + String fieldName = rowTypeInfo.getFieldNames()[i]; + if (fieldName.equalsIgnoreCase(rightField)) { + equalFieldIndex = i; + } + } + if (equalFieldIndex == -1) { + throw new RuntimeException("can't deal equal field: " + sqlNode); + } + + equalValIndex.add(equalFieldIndex); + + } else if (rightTableName.equalsIgnoreCase(sideTableName)) { + + equalFieldList.add(rightField); + int equalFieldIndex = -1; + for (int i = 0; i < rowTypeInfo.getFieldNames().length; i++) { + String fieldName = rowTypeInfo.getFieldNames()[i]; + if (fieldName.equalsIgnoreCase(leftField)) { + equalFieldIndex = i; + } + } + if (equalFieldIndex == -1) { + throw new RuntimeException("can't deal equal field: " + sqlNode.toString()); + } + + equalValIndex.add(equalFieldIndex); + + } else { + throw new RuntimeException("resolve equalFieldList error:" + sqlNode.toString()); + } + + } + +} diff --git a/cassandra/cassandra-side/cassandra-side-core/pom.xml b/cassandra/cassandra-side/cassandra-side-core/pom.xml new file mode 100644 index 000000000..a3137b763 --- /dev/null +++ b/cassandra/cassandra-side/cassandra-side-core/pom.xml @@ -0,0 +1,24 @@ + + + + sql.side.cassandra + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.cassandra.core + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + jar + + \ No newline at end of file diff --git a/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideParser.java b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideParser.java new file mode 100644 index 000000000..6403a225b --- /dev/null +++ b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideParser.java @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.cassandra.table; + +import com.dtstack.flink.sql.table.AbsSideTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static com.dtstack.flink.sql.table.TableInfo.PARALLELISM_KEY; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraSideParser extends AbsSideTableParser { + + private final static String SIDE_SIGN_KEY = "sideSignKey"; + + private final static Pattern SIDE_TABLE_SIGN = Pattern.compile("(?i)^PERIOD\\s+FOR\\s+SYSTEM_TIME$"); + + public static final String ADDRESS_KEY = "address"; + + public static final String TABLE_NAME_KEY = "tableName"; + + public static final String USER_NAME_KEY = "userName"; + + public static final String PASSWORD_KEY = "password"; + + public static final String DATABASE_KEY = "database"; + + public static final String MAX_REQUEST_PER_CONNECTION_KEY = "maxRequestsPerConnection"; + + public static final String CORE_CONNECTIONS_PER_HOST_KEY = "coreConnectionsPerHost"; + + public static final String MAX_CONNECTIONS_PER_HOST_KEY = "maxConnectionsPerHost"; + + public static final String MAX_QUEUE_SIZE_KEY = "maxQueueSize"; + + public static final String READ_TIMEOUT_MILLIS_KEY = "readTimeoutMillis"; + + public static final String CONNECT_TIMEOUT_MILLIS_KEY = "connectTimeoutMillis"; + + public static final String POOL_TIMEOUT_MILLIS_KEY = "poolTimeoutMillis"; + + static { + keyPatternMap.put(SIDE_SIGN_KEY, SIDE_TABLE_SIGN); + keyHandlerMap.put(SIDE_SIGN_KEY, CassandraSideParser::dealSideSign); + } + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo cassandraSideTableInfo = new com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo(); + cassandraSideTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, cassandraSideTableInfo); + parseCacheProp(cassandraSideTableInfo, props); + + cassandraSideTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PARALLELISM_KEY.toLowerCase()))); + cassandraSideTableInfo.setAddress(MathUtil.getString(props.get(ADDRESS_KEY.toLowerCase()))); + cassandraSideTableInfo.setTableName(MathUtil.getString(props.get(TABLE_NAME_KEY.toLowerCase()))); + cassandraSideTableInfo.setDatabase(MathUtil.getString(props.get(DATABASE_KEY.toLowerCase()))); + cassandraSideTableInfo.setUserName(MathUtil.getString(props.get(USER_NAME_KEY.toLowerCase()))); + cassandraSideTableInfo.setPassword(MathUtil.getString(props.get(PASSWORD_KEY.toLowerCase()))); + cassandraSideTableInfo.setMaxRequestsPerConnection(MathUtil.getIntegerVal(props.get(MAX_REQUEST_PER_CONNECTION_KEY.toLowerCase()))); + cassandraSideTableInfo.setCoreConnectionsPerHost(MathUtil.getIntegerVal(props.get(CORE_CONNECTIONS_PER_HOST_KEY.toLowerCase()))); + cassandraSideTableInfo.setMaxConnectionsPerHost(MathUtil.getIntegerVal(props.get(MAX_CONNECTIONS_PER_HOST_KEY.toLowerCase()))); + cassandraSideTableInfo.setMaxQueueSize(MathUtil.getIntegerVal(props.get(MAX_QUEUE_SIZE_KEY.toLowerCase()))); + cassandraSideTableInfo.setReadTimeoutMillis(MathUtil.getIntegerVal(props.get(READ_TIMEOUT_MILLIS_KEY.toLowerCase()))); + cassandraSideTableInfo.setConnectTimeoutMillis(MathUtil.getIntegerVal(props.get(CONNECT_TIMEOUT_MILLIS_KEY.toLowerCase()))); + cassandraSideTableInfo.setPoolTimeoutMillis(MathUtil.getIntegerVal(props.get(POOL_TIMEOUT_MILLIS_KEY.toLowerCase()))); + + return cassandraSideTableInfo; + } + + private static void dealSideSign(Matcher matcher, TableInfo tableInfo) { + } +} diff --git a/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java new file mode 100644 index 000000000..b1b36f7e8 --- /dev/null +++ b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.cassandra.table; + +import com.dtstack.flink.sql.side.SideTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraSideTableInfo extends SideTableInfo { + + private static final long serialVersionUID = -5556431094535478915L; + + private static final String CURR_TYPE = "cassandra"; + + public static final String ADDRESS_KEY = "address"; + + public static final String TABLE_NAME_KEY = "tableName"; + + public static final String USER_NAME_KEY = "userName"; + + public static final String PASSWORD_KEY = "password"; + + public static final String DATABASE_KEY = "database"; + + public static final String MAX_REQUEST_PER_CONNECTION_KEY = "maxRequestsPerConnection"; + + public static final String CORE_CONNECTIONS_PER_HOST_KEY = "coreConnectionsPerHost"; + + public static final String MAX_CONNECTIONS_PER_HOST_KEY = "maxConnectionsPerHost"; + + public static final String MAX_QUEUE_SIZE_KEY = "maxQueueSize"; + + public static final String READ_TIMEOUT_MILLIS_KEY = "readTimeoutMillis"; + + public static final String CONNECT_TIMEOUT_MILLIS_KEY = "connectTimeoutMillis"; + + public static final String POOL_TIMEOUT_MILLIS_KEY = "poolTimeoutMillis"; + + private String address; + private String tableName; + private String userName; + private String password; + private String database; + private Integer maxRequestsPerConnection; + private Integer coreConnectionsPerHost; + private Integer maxConnectionsPerHost; + private Integer maxQueueSize; + private Integer readTimeoutMillis; + private Integer connectTimeoutMillis; + private Integer poolTimeoutMillis; + + public String getAddress() { + return address; + } + + public void setAddress(String address) { + this.address = address; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getUserName() { + return userName; + } + + public void setUserName(String userName) { + this.userName = userName; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Integer getMaxRequestsPerConnection() { + return maxRequestsPerConnection; + } + + public void setMaxRequestsPerConnection(Integer maxRequestsPerConnection) { + this.maxRequestsPerConnection = maxRequestsPerConnection; + } + + public Integer getCoreConnectionsPerHost() { + return coreConnectionsPerHost; + } + + public void setCoreConnectionsPerHost(Integer coreConnectionsPerHost) { + this.coreConnectionsPerHost = coreConnectionsPerHost; + } + + public Integer getMaxConnectionsPerHost() { + return maxConnectionsPerHost; + } + + public void setMaxConnectionsPerHost(Integer maxConnectionsPerHost) { + this.maxConnectionsPerHost = maxConnectionsPerHost; + } + + public Integer getMaxQueueSize() { + return maxQueueSize; + } + + public void setMaxQueueSize(Integer maxQueueSize) { + this.maxQueueSize = maxQueueSize; + } + + public Integer getReadTimeoutMillis() { + return readTimeoutMillis; + } + + public void setReadTimeoutMillis(Integer readTimeoutMillis) { + this.readTimeoutMillis = readTimeoutMillis; + } + + public Integer getConnectTimeoutMillis() { + return connectTimeoutMillis; + } + + public void setConnectTimeoutMillis(Integer connectTimeoutMillis) { + this.connectTimeoutMillis = connectTimeoutMillis; + } + + public Integer getPoolTimeoutMillis() { + return poolTimeoutMillis; + } + + public void setPoolTimeoutMillis(Integer poolTimeoutMillis) { + this.poolTimeoutMillis = poolTimeoutMillis; + } + + public CassandraSideTableInfo() { + setType(CURR_TYPE); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(address, "Cassandra field of ADDRESS is required"); + Preconditions.checkNotNull(database, "Cassandra field of database is required"); + Preconditions.checkNotNull(tableName, "Cassandra field of tableName is required"); + return true; + } +} diff --git a/cassandra/cassandra-side/pom.xml b/cassandra/cassandra-side/pom.xml new file mode 100644 index 000000000..92d058900 --- /dev/null +++ b/cassandra/cassandra-side/pom.xml @@ -0,0 +1,23 @@ + + + + sql.cassandra + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.cassandra + cassandra-side + + cassandra-side-core + cassandra-async-side + cassandra-all-side + + + pom + + \ No newline at end of file diff --git a/cassandra/cassandra-sink/pom.xml b/cassandra/cassandra-sink/pom.xml new file mode 100644 index 000000000..4fb20c373 --- /dev/null +++ b/cassandra/cassandra-sink/pom.xml @@ -0,0 +1,82 @@ + + + sql.cassandra + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.cassandra + jar + + cassandra-sink + http://maven.apache.org + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + diff --git a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraOutputFormat.java b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraOutputFormat.java new file mode 100644 index 000000000..11fe24b81 --- /dev/null +++ b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraOutputFormat.java @@ -0,0 +1,379 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.cassandra; + +import com.datastax.driver.core.Cluster; +import com.datastax.driver.core.ConsistencyLevel; +import com.datastax.driver.core.HostDistance; +import com.datastax.driver.core.PoolingOptions; +import com.datastax.driver.core.QueryOptions; +import com.datastax.driver.core.ResultSet; +import com.datastax.driver.core.Session; +import com.datastax.driver.core.SocketOptions; +import com.datastax.driver.core.policies.DowngradingConsistencyRetryPolicy; +import com.datastax.driver.core.policies.RetryPolicy; +import com.dtstack.flink.sql.metric.MetricConstant; +import org.apache.flink.api.common.io.RichOutputFormat; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.Meter; +import org.apache.flink.metrics.MeterView; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.net.InetAddress; +import java.sql.DriverManager; +import java.sql.PreparedStatement; +import java.util.ArrayList; + +/** + * OutputFormat to write tuples into a database. + * The OutputFormat has to be configured using the supplied OutputFormatBuilder. + * + * @see Tuple + * @see DriverManager + */ +public class CassandraOutputFormat extends RichOutputFormat { + private static final long serialVersionUID = -7994311331389155692L; + + private static final Logger LOG = LoggerFactory.getLogger(CassandraOutputFormat.class); + + private String address; + private String tableName; + private String userName; + private String password; + private String database; + private Integer maxRequestsPerConnection; + private Integer coreConnectionsPerHost; + private Integer maxConnectionsPerHost; + private Integer maxQueueSize; + private Integer readTimeoutMillis; + private Integer connectTimeoutMillis; + private Integer poolTimeoutMillis; + + protected String[] fieldNames; + TypeInformation[] fieldTypes; + + private int batchInterval = 5000; + + private Cluster cluster; + private Session session = null; + + private int batchCount = 0; + + private transient Counter outRecords; + + private transient Meter outRecordsRate; + + public CassandraOutputFormat() { + } + + @Override + public void configure(Configuration parameters) { + } + + /** + * Connects to the target database and initializes the prepared statement. + * + * @param taskNumber The number of the parallel instance. + * @throws IOException Thrown, if the output could not be opened due to an + * I/O problem. + */ + @Override + public void open(int taskNumber, int numTasks) throws IOException { + try { + if (session == null) { + QueryOptions queryOptions = new QueryOptions(); + //The default consistency level for queries: ConsistencyLevel.TWO. + queryOptions.setConsistencyLevel(ConsistencyLevel.QUORUM); + Integer maxRequestsPerConnection = this.maxRequestsPerConnection == null ? 1 : this.maxRequestsPerConnection; + Integer coreConnectionsPerHost = this.coreConnectionsPerHost == null ? 8 : this.coreConnectionsPerHost; + Integer maxConnectionsPerHost = this.maxConnectionsPerHost == null ? 32768 : this.maxConnectionsPerHost; + Integer maxQueueSize = this.maxQueueSize == null ? 100000 : this.maxQueueSize; + Integer readTimeoutMillis = this.readTimeoutMillis == null ? 60000 : this.readTimeoutMillis; + Integer connectTimeoutMillis = this.connectTimeoutMillis == null ? 60000 : this.connectTimeoutMillis; + Integer poolTimeoutMillis = this.poolTimeoutMillis == null ? 60000 : this.poolTimeoutMillis; + Integer cassandraPort = 0; + + ArrayList serversList = new ArrayList(); + //Read timeout or connection timeout Settings + SocketOptions so = new SocketOptions() + .setReadTimeoutMillis(readTimeoutMillis) + .setConnectTimeoutMillis(connectTimeoutMillis); + + //The cluster USES hostdistance.local in the same machine room + //Hostdistance. REMOTE is used for different machine rooms + //Ignore use HostDistance. IGNORED + PoolingOptions poolingOptions = new PoolingOptions() + //Each connection allows a maximum of 64 concurrent requests + .setMaxRequestsPerConnection(HostDistance.LOCAL, maxRequestsPerConnection) + //Have at least two connections to each machine in the cluster + .setCoreConnectionsPerHost(HostDistance.LOCAL, coreConnectionsPerHost) + //There are up to eight connections to each machine in the cluster + .setMaxConnectionsPerHost(HostDistance.LOCAL, maxConnectionsPerHost) + .setMaxQueueSize(maxQueueSize) + .setPoolTimeoutMillis(poolTimeoutMillis); + //重试策略 + RetryPolicy retryPolicy = DowngradingConsistencyRetryPolicy.INSTANCE; + + for (String server : address.split(",")) { + cassandraPort = Integer.parseInt(server.split(":")[1]); + serversList.add(InetAddress.getByName(server.split(":")[0])); + } + + if (userName == null || userName.isEmpty() || password == null || password.isEmpty()) { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withQueryOptions(queryOptions).build(); + } else { + cluster = Cluster.builder().addContactPoints(serversList).withRetryPolicy(retryPolicy) + .withPort(cassandraPort) + .withPoolingOptions(poolingOptions).withSocketOptions(so) + .withCredentials(userName, password) + .withQueryOptions(queryOptions).build(); + } + // 建立连接 连接已存在的键空间 + session = cluster.connect(database); + LOG.info("connect cassandra is successed!"); + initMetric(); + } + } catch (Exception e) { + LOG.error("connect cassandra is error:" + e.getMessage()); + } + } + + private void initMetric() { + outRecords = getRuntimeContext().getMetricGroup().counter(MetricConstant.DT_NUM_RECORDS_OUT); + outRecordsRate = getRuntimeContext().getMetricGroup().meter(MetricConstant.DT_NUM_RECORDS_OUT_RATE, new MeterView(outRecords, 20)); + } + + /** + * Adds a record to the prepared statement. + *

+ * When this method is called, the output format is guaranteed to be opened. + *

+ *

+ * WARNING: this may fail when no column types specified (because a best effort approach is attempted in order to + * insert a null value but it's not guaranteed that the JDBC driver handles PreparedStatement.setObject(pos, null)) + * + * @param tuple2 The records to add to the output. + * @throws IOException Thrown, if the records could not be added due to an I/O problem. + * @see PreparedStatement + */ + @Override + public void writeRecord(Tuple2 tuple2) throws IOException { + Tuple2 tupleTrans = tuple2; + Boolean retract = tupleTrans.getField(0); + Row row = tupleTrans.getField(1); + try { + if (retract) { + insertWrite(row); + outRecords.inc(); + } else { + //do nothing + } + } catch (Exception e) { + throw new IllegalArgumentException("writeRecord() failed", e); + } + } + + private void insertWrite(Row row) { + try { + String cql = buildSql(row); + if (cql != null) { + ResultSet resultSet = session.execute(cql); + resultSet.wasApplied(); + } + } catch (Exception e) { + LOG.error("[upsert] is error:" + e.getMessage()); + } + } + + private String buildSql(Row row) { + StringBuffer fields = new StringBuffer(); + StringBuffer values = new StringBuffer(); + for (int index = 0; index < row.getArity(); index++) { + if (row.getField(index) == null) { + } else { + fields.append(fieldNames[index] + ","); + values.append("'" + row.getField(index) + "'" + ","); + } + } + fields.deleteCharAt(fields.length() - 1); + values.deleteCharAt(values.length() - 1); + String cql = "INSERT INTO " + database + "." + tableName + " (" + fields.toString() + ") " + + " VALUES (" + values.toString() + ")"; + return cql; + } + + /** + * Executes prepared statement and closes all resources of this instance. + * + * @throws IOException Thrown, if the input could not be closed properly. + */ + @Override + public void close() throws IOException { + try { + if (session != null) { + session.close(); + } + } catch (Exception e) { + LOG.error("Error while closing session.", e); + } + try { + if (cluster != null) { + cluster.close(); + } + } catch (Exception e) { + LOG.error("Error while closing cluster.", e); + } + LOG.info("close cassandra is successed!"); + } + + public static CassandraFormatBuilder buildOutputFormat() { + return new CassandraFormatBuilder(); + } + + public static class CassandraFormatBuilder { + private final CassandraOutputFormat format; + + protected CassandraFormatBuilder() { + this.format = new CassandraOutputFormat(); + } + + public CassandraFormatBuilder setUsername(String username) { + format.userName = username; + return this; + } + + public CassandraFormatBuilder setPassword(String password) { + format.password = password; + return this; + } + + public CassandraFormatBuilder setAddress(String address) { + format.address = address; + return this; + } + + public CassandraFormatBuilder setTableName(String tableName) { + format.tableName = tableName; + return this; + } + + public CassandraFormatBuilder setDatabase(String database) { + format.database = database; + return this; + } + + public CassandraFormatBuilder setFieldNames(String[] fieldNames) { + format.fieldNames = fieldNames; + return this; + } + + public CassandraFormatBuilder setFieldTypes(TypeInformation[] fieldTypes) { + format.fieldTypes = fieldTypes; + return this; + } + + public CassandraFormatBuilder setMaxRequestsPerConnection(Integer maxRequestsPerConnection) { + format.maxRequestsPerConnection = maxRequestsPerConnection; + return this; + } + + public CassandraFormatBuilder setCoreConnectionsPerHost(Integer coreConnectionsPerHost) { + format.coreConnectionsPerHost = coreConnectionsPerHost; + return this; + } + + public CassandraFormatBuilder setMaxConnectionsPerHost(Integer maxConnectionsPerHost) { + format.maxConnectionsPerHost = maxConnectionsPerHost; + return this; + } + + public CassandraFormatBuilder setMaxQueueSize(Integer maxQueueSize) { + format.maxQueueSize = maxQueueSize; + return this; + } + + public CassandraFormatBuilder setReadTimeoutMillis(Integer readTimeoutMillis) { + format.readTimeoutMillis = readTimeoutMillis; + return this; + } + + public CassandraFormatBuilder setConnectTimeoutMillis(Integer connectTimeoutMillis) { + format.connectTimeoutMillis = connectTimeoutMillis; + return this; + } + + public CassandraFormatBuilder setPoolTimeoutMillis(Integer poolTimeoutMillis) { + format.poolTimeoutMillis = poolTimeoutMillis; + return this; + } + + /** + * Finalizes the configuration and checks validity. + * + * @return Configured RetractJDBCOutputFormat + */ + public CassandraOutputFormat finish() { + if (format.userName == null) { + LOG.info("Username was not supplied separately."); + } + if (format.password == null) { + LOG.info("Password was not supplied separately."); + } + if (format.address == null) { + throw new IllegalArgumentException("No address URL supplied."); + } + if (format.database == null) { + throw new IllegalArgumentException("No dababase suplied"); + } + if (format.tableName == null) { + throw new IllegalArgumentException("No tableName supplied"); + } + return format; + } + } +} diff --git a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraSink.java b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraSink.java new file mode 100644 index 000000000..eb7b23b53 --- /dev/null +++ b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/CassandraSink.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.cassandra; + + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.cassandra.table.CassandraTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TupleTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction; +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.table.sinks.RetractStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraSink implements RetractStreamTableSink, IStreamSinkGener { + + + protected String[] fieldNames; + TypeInformation[] fieldTypes; + protected String address; + protected String tableName; + protected String userName; + protected String password; + protected String database; + protected Integer maxRequestsPerConnection; + protected Integer coreConnectionsPerHost; + protected Integer maxConnectionsPerHost; + protected Integer maxQueueSize; + protected Integer readTimeoutMillis; + protected Integer connectTimeoutMillis; + protected Integer poolTimeoutMillis; + + public CassandraSink() { + // TO DO NOTHING + } + + @Override + public CassandraSink genStreamSink(TargetTableInfo targetTableInfo) { + CassandraTableInfo cassandraTableInfo = (CassandraTableInfo) targetTableInfo; + this.address = cassandraTableInfo.getAddress(); + this.tableName = cassandraTableInfo.getTableName(); + this.userName = cassandraTableInfo.getUserName(); + this.password = cassandraTableInfo.getPassword(); + this.database = cassandraTableInfo.getDatabase(); + this.maxRequestsPerConnection = cassandraTableInfo.getMaxRequestsPerConnection(); + this.coreConnectionsPerHost = cassandraTableInfo.getCoreConnectionsPerHost(); + this.maxConnectionsPerHost = cassandraTableInfo.getMaxConnectionsPerHost(); + this.maxQueueSize = cassandraTableInfo.getMaxQueueSize(); + this.readTimeoutMillis = cassandraTableInfo.getReadTimeoutMillis(); + this.connectTimeoutMillis = cassandraTableInfo.getConnectTimeoutMillis(); + this.poolTimeoutMillis = cassandraTableInfo.getPoolTimeoutMillis(); + return this; + } + + @Override + public void emitDataStream(DataStream> dataStream) { + CassandraOutputFormat.CassandraFormatBuilder builder = CassandraOutputFormat.buildOutputFormat(); + builder.setAddress(this.address) + .setDatabase(this.database) + .setTableName(this.tableName) + .setPassword(this.password) + .setUsername(this.userName) + .setMaxRequestsPerConnection(this.maxRequestsPerConnection) + .setCoreConnectionsPerHost(this.coreConnectionsPerHost) + .setMaxConnectionsPerHost(this.maxConnectionsPerHost) + .setMaxQueueSize(this.maxQueueSize) + .setReadTimeoutMillis(this.readTimeoutMillis) + .setConnectTimeoutMillis(this.connectTimeoutMillis) + .setPoolTimeoutMillis(this.poolTimeoutMillis) + .setFieldNames(this.fieldNames) + .setFieldTypes(this.fieldTypes); + + CassandraOutputFormat outputFormat = builder.finish(); + RichSinkFunction richSinkFunction = new OutputFormatSinkFunction(outputFormat); + dataStream.addSink(richSinkFunction); + } + + @Override + public TableSink> configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + + @Override + public TupleTypeInfo> getOutputType() { + return new TupleTypeInfo(org.apache.flink.table.api.Types.BOOLEAN(), getRecordType()); + } + + @Override + public TypeInformation getRecordType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + +} diff --git a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraSinkParser.java b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraSinkParser.java new file mode 100644 index 000000000..4c68e71ae --- /dev/null +++ b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraSinkParser.java @@ -0,0 +1,84 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.cassandra.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +import static com.dtstack.flink.sql.table.TableInfo.PARALLELISM_KEY; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraSinkParser extends AbsTableParser { + + public static final String ADDRESS_KEY = "address"; + + public static final String TABLE_NAME_KEY = "tableName"; + + public static final String USER_NAME_KEY = "userName"; + + public static final String PASSWORD_KEY = "password"; + + public static final String DATABASE_KEY = "database"; + + public static final String MAX_REQUEST_PER_CONNECTION_KEY = "maxRequestsPerConnection"; + + public static final String CORE_CONNECTIONS_PER_HOST_KEY = "coreConnectionsPerHost"; + + public static final String MAX_CONNECTIONS_PER_HOST_KEY = "maxConnectionsPerHost"; + + public static final String MAX_QUEUE_SIZE_KEY = "maxQueueSize"; + + public static final String READ_TIMEOUT_MILLIS_KEY = "readTimeoutMillis"; + + public static final String CONNECT_TIMEOUT_MILLIS_KEY = "connectTimeoutMillis"; + + public static final String POOL_TIMEOUT_MILLIS_KEY = "poolTimeoutMillis"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + CassandraTableInfo cassandraTableInfo = new CassandraTableInfo(); + cassandraTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, cassandraTableInfo); + + cassandraTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PARALLELISM_KEY.toLowerCase()))); + cassandraTableInfo.setAddress(MathUtil.getString(props.get(ADDRESS_KEY.toLowerCase()))); + cassandraTableInfo.setTableName(MathUtil.getString(props.get(TABLE_NAME_KEY.toLowerCase()))); + cassandraTableInfo.setDatabase(MathUtil.getString(props.get(DATABASE_KEY.toLowerCase()))); + cassandraTableInfo.setUserName(MathUtil.getString(props.get(USER_NAME_KEY.toLowerCase()))); + cassandraTableInfo.setPassword(MathUtil.getString(props.get(PASSWORD_KEY.toLowerCase()))); + cassandraTableInfo.setMaxRequestsPerConnection(MathUtil.getIntegerVal(props.get(MAX_REQUEST_PER_CONNECTION_KEY.toLowerCase()))); + cassandraTableInfo.setCoreConnectionsPerHost(MathUtil.getIntegerVal(props.get(CORE_CONNECTIONS_PER_HOST_KEY.toLowerCase()))); + cassandraTableInfo.setMaxConnectionsPerHost(MathUtil.getIntegerVal(props.get(MAX_CONNECTIONS_PER_HOST_KEY.toLowerCase()))); + cassandraTableInfo.setMaxQueueSize(MathUtil.getIntegerVal(props.get(MAX_QUEUE_SIZE_KEY.toLowerCase()))); + cassandraTableInfo.setReadTimeoutMillis(MathUtil.getIntegerVal(props.get(READ_TIMEOUT_MILLIS_KEY.toLowerCase()))); + cassandraTableInfo.setConnectTimeoutMillis(MathUtil.getIntegerVal(props.get(CONNECT_TIMEOUT_MILLIS_KEY.toLowerCase()))); + cassandraTableInfo.setPoolTimeoutMillis(MathUtil.getIntegerVal(props.get(POOL_TIMEOUT_MILLIS_KEY.toLowerCase()))); + + return cassandraTableInfo; + } +} diff --git a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java new file mode 100644 index 000000000..7d52b23bb --- /dev/null +++ b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.cassandra.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +/** + * Reason: + * Date: 2018/11/22 + * + * @author xuqianjin + */ +public class CassandraTableInfo extends TargetTableInfo { + + private static final String CURR_TYPE = "cassandra"; + + private String address; + private String tableName; + private String userName; + private String password; + private String database; + private Integer maxRequestsPerConnection; + private Integer coreConnectionsPerHost; + private Integer maxConnectionsPerHost; + private Integer maxQueueSize; + private Integer readTimeoutMillis; + private Integer connectTimeoutMillis; + private Integer poolTimeoutMillis; + + public CassandraTableInfo() { + setType(CURR_TYPE); + } + + public String getAddress() { + return address; + } + + public void setAddress(String address) { + this.address = address; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public String getUserName() { + return userName; + } + + public void setUserName(String userName) { + this.userName = userName; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Integer getMaxRequestsPerConnection() { + return maxRequestsPerConnection; + } + + public void setMaxRequestsPerConnection(Integer maxRequestsPerConnection) { + this.maxRequestsPerConnection = maxRequestsPerConnection; + } + + public Integer getCoreConnectionsPerHost() { + return coreConnectionsPerHost; + } + + public void setCoreConnectionsPerHost(Integer coreConnectionsPerHost) { + this.coreConnectionsPerHost = coreConnectionsPerHost; + } + + public Integer getMaxConnectionsPerHost() { + return maxConnectionsPerHost; + } + + public void setMaxConnectionsPerHost(Integer maxConnectionsPerHost) { + this.maxConnectionsPerHost = maxConnectionsPerHost; + } + + public Integer getMaxQueueSize() { + return maxQueueSize; + } + + public void setMaxQueueSize(Integer maxQueueSize) { + this.maxQueueSize = maxQueueSize; + } + + public Integer getReadTimeoutMillis() { + return readTimeoutMillis; + } + + public void setReadTimeoutMillis(Integer readTimeoutMillis) { + this.readTimeoutMillis = readTimeoutMillis; + } + + public Integer getConnectTimeoutMillis() { + return connectTimeoutMillis; + } + + public void setConnectTimeoutMillis(Integer connectTimeoutMillis) { + this.connectTimeoutMillis = connectTimeoutMillis; + } + + public Integer getPoolTimeoutMillis() { + return poolTimeoutMillis; + } + + public void setPoolTimeoutMillis(Integer poolTimeoutMillis) { + this.poolTimeoutMillis = poolTimeoutMillis; + } + + @Override + public boolean check() { + Preconditions.checkNotNull(address, "Cassandra field of ADDRESS is required"); + Preconditions.checkNotNull(database, "Cassandra field of database is required"); + Preconditions.checkNotNull(tableName, "Cassandra field of tableName is required"); + return true; + } + + @Override + public String getType() { + // return super.getType().toLowerCase() + TARGET_SUFFIX; + return super.getType().toLowerCase(); + } +} diff --git a/cassandra/cassandra-sink/src/test/java/com/dtstack/flinkx/AppTest.java b/cassandra/cassandra-sink/src/test/java/com/dtstack/flinkx/AppTest.java new file mode 100644 index 000000000..33a0233ac --- /dev/null +++ b/cassandra/cassandra-sink/src/test/java/com/dtstack/flinkx/AppTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flinkx; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Unit test for simple App. + */ +public class AppTest + extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public AppTest( String testName ) + { + super( testName ); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite( AppTest.class ); + } + + /** + * Rigourous Test :-) + */ + public void testApp() + { + assertTrue( true ); + } +} diff --git a/cassandra/pom.xml b/cassandra/pom.xml new file mode 100644 index 000000000..f49de388b --- /dev/null +++ b/cassandra/pom.xml @@ -0,0 +1,39 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + sql.cassandra + pom + + + cassandra-sink + cassandra-side + + + + + junit + junit + 3.8.1 + test + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + com.datastax.cassandra + cassandra-driver-core + 3.6.0 + + + + \ No newline at end of file diff --git a/console/console-sink/pom.xml b/console/console-sink/pom.xml new file mode 100644 index 000000000..a5ca2629f --- /dev/null +++ b/console/console-sink/pom.xml @@ -0,0 +1,79 @@ + + + + sql.console + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + console-sink + jar + + console-sink + http://maven.apache.org + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleOutputFormat.java b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleOutputFormat.java new file mode 100644 index 000000000..7658e9979 --- /dev/null +++ b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleOutputFormat.java @@ -0,0 +1,117 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.console; + +import com.dtstack.flink.sql.sink.MetricOutputFormat; +import com.dtstack.flink.sql.sink.console.table.TablePrintUtil; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +/** + * Reason: + * Date: 2018/12/19 + * + * @author xuqianjin + */ +public class ConsoleOutputFormat extends MetricOutputFormat { + + private static final Logger LOG = LoggerFactory.getLogger(ConsoleOutputFormat.class); + + protected String[] fieldNames; + TypeInformation[] fieldTypes; + + @Override + public void configure(Configuration parameters) { + + } + + @Override + public void open(int taskNumber, int numTasks) throws IOException { + initMetric(); + } + + @Override + public void writeRecord(Tuple2 tuple2) throws IOException { + Tuple2 tupleTrans = tuple2; + Boolean retract = tupleTrans.getField(0); + if (!retract) { + return; + } + + Row record = tupleTrans.getField(1); + if (record.getArity() != fieldNames.length) { + return; + } + + List data = new ArrayList<>(); + data.add(fieldNames); + data.add(record.toString().split(",")); + TablePrintUtil.build(data).print(); + + outRecords.inc(); + } + + @Override + public void close() throws IOException { + + } + + private ConsoleOutputFormat() { + } + + public static ConsoleOutputFormatBuilder buildOutputFormat() { + return new ConsoleOutputFormatBuilder(); + } + + public static class ConsoleOutputFormatBuilder { + + private final ConsoleOutputFormat format; + + protected ConsoleOutputFormatBuilder() { + this.format = new ConsoleOutputFormat(); + } + + public ConsoleOutputFormatBuilder setFieldNames(String[] fieldNames) { + format.fieldNames = fieldNames; + return this; + } + + public ConsoleOutputFormatBuilder setFieldTypes(TypeInformation[] fieldTypes) { + format.fieldTypes = fieldTypes; + return this; + } + + /** + * Finalizes the configuration and checks validity. + * + * @return Configured RetractConsoleCOutputFormat + */ + public ConsoleOutputFormat finish() { + return format; + } + } +} diff --git a/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleSink.java b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleSink.java new file mode 100644 index 000000000..77a3efea2 --- /dev/null +++ b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/ConsoleSink.java @@ -0,0 +1,86 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.console; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TupleTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction; +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.table.sinks.RetractStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +/** + * Reason: + * Date: 2018/12/19 + * + * @author xuqianjin + */ +public class ConsoleSink implements RetractStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + TypeInformation[] fieldTypes; + + @Override + public TableSink> configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + + @Override + public TupleTypeInfo> getOutputType() { + return new TupleTypeInfo(org.apache.flink.table.api.Types.BOOLEAN(), getRecordType()); + } + + @Override + public TypeInformation getRecordType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public void emitDataStream(DataStream> dataStream) { + ConsoleOutputFormat.ConsoleOutputFormatBuilder builder = ConsoleOutputFormat.buildOutputFormat(); + builder.setFieldNames(this.fieldNames) + .setFieldTypes(this.fieldTypes); + ConsoleOutputFormat outputFormat = builder.finish(); + RichSinkFunction richSinkFunction = new OutputFormatSinkFunction(outputFormat); + dataStream.addSink(richSinkFunction); + } + + @Override + public ConsoleSink genStreamSink(TargetTableInfo targetTableInfo) { + return this; + } +} diff --git a/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleSinkParser.java b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleSinkParser.java new file mode 100644 index 000000000..e77444bfd --- /dev/null +++ b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleSinkParser.java @@ -0,0 +1,45 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.console.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +import static com.dtstack.flink.sql.table.TableInfo.PARALLELISM_KEY; + +/** + * Reason: + * Date: 2018/12/19 + * + * @author xuqianjin + */ +public class ConsoleSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + ConsoleTableInfo consoleTableInfo = new ConsoleTableInfo(); + consoleTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, consoleTableInfo); + + consoleTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PARALLELISM_KEY.toLowerCase()))); + return consoleTableInfo; + } +} diff --git a/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleTableInfo.java b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleTableInfo.java new file mode 100644 index 000000000..4b286c667 --- /dev/null +++ b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/ConsoleTableInfo.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.console.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; + +/** + * Reason: + * Date: 2018/12/19 + * + * @author xuqianjin + */ +public class ConsoleTableInfo extends TargetTableInfo { + + private static final String CURR_TYPE = "console"; + + public ConsoleTableInfo() { + setType(CURR_TYPE); + } + + @Override + public boolean check() { + return true; + } + + @Override + public String getType() { + return super.getType().toLowerCase(); + } +} diff --git a/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/TablePrintUtil.java b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/TablePrintUtil.java new file mode 100644 index 000000000..8813da619 --- /dev/null +++ b/console/console-sink/src/main/java/com/dtstack/flink/sql/sink/console/table/TablePrintUtil.java @@ -0,0 +1,341 @@ +package com.dtstack.flink.sql.sink.console.table; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * Reason: + * Date: 2018/12/19 + * + * @author xuqianjin + */ +public class TablePrintUtil { + private static final Logger LOG = LoggerFactory.getLogger(TablePrintUtil.class); + public static final int ALIGN_LEFT = 1;//左对齐 + public static final int ALIGN_RIGHT = 2;//右对齐 + public static final int ALIGN_CENTER = 3;//居中对齐 + + private int align = ALIGN_CENTER;//默认居中对齐 + private boolean equilong = false;//默认不等宽 + private int padding = 1;//左右边距默认为1 + private char h = '-';//默认水平分隔符 + private char v = '|';//默认竖直分隔符 + private char o = '+';//默认交叉分隔符 + private char s = ' ';//默认空白填充符 + private List data;//数据 + + private TablePrintUtil() { + } + + /** + * 链式调用入口方法 + * + * @param data + * @return + */ + public static TablePrintUtil build(String[][] data) { + TablePrintUtil self = new TablePrintUtil(); + self.data = new ArrayList<>(Arrays.asList(data)); + return self; + } + + /** + * 链式调用入口方法,T可以是String[]、List、任意实体类 + * 由于java泛型不同无法重载,所以这里要写if instanceof进行类型判断 + * + * @param data + * @param + * @return + */ + public static TablePrintUtil build(List data) { + TablePrintUtil self = new TablePrintUtil(); + self.data = new ArrayList<>(); + if (data.size() <= 0) throw new RuntimeException("数据源至少得有一行吧"); + Object obj = data.get(0); + + + if (obj instanceof String[]) { + //如果泛型为String数组,则直接设置 + self.data = (List) data; + } else if (obj instanceof List) { + //如果泛型为List,则把list中的item依次转为String[],再设置 + int length = ((List) obj).size(); + for (Object item : data) { + List col = (List) item; + if (col.size() != length) throw new RuntimeException("数据源每列长度必须一致"); + self.data.add(col.toArray(new String[length])); + } + } else { + //如果泛型为实体类,则利用反射获取get方法列表,从而推算出属性列表。 + //根据反射得来的属性列表设置表格第一行thead + List colList = getColList(obj); + String[] header = new String[colList.size()]; + for (int i = 0; i < colList.size(); i++) { + header[i] = colList.get(i).colName; + } + self.data.add(header); + //利用反射调用相应get方法获取属性值来设置表格tbody + for (int i = 0; i < data.size(); i++) { + String[] item = new String[colList.size()]; + for (int j = 0; j < colList.size(); j++) { + String value = null; + try { + value = obj.getClass().getMethod(colList.get(j).getMethodName).invoke(data.get(i)).toString(); + } catch (IllegalAccessException | InvocationTargetException | NoSuchMethodException e) { + e.printStackTrace(); + } + item[j] = value == null ? "null" : value; + } + self.data.add(item); + } + } + return self; + } + + private static class Col { + private String colName;//列名 + private String getMethodName;//get方法名 + } + + /** + * 利用反射获取get方法名和属性名 + * + * @return + */ + private static List getColList(Object obj) { + List colList = new ArrayList<>(); + Method[] methods = obj.getClass().getMethods(); + for (Method m : methods) { + StringBuilder getMethodName = new StringBuilder(m.getName()); + if (getMethodName.substring(0, 3).equals("get") && !m.getName().equals("getClass")) { + Col col = new Col(); + col.getMethodName = getMethodName.toString(); + char first = Character.toLowerCase(getMethodName.delete(0, 3).charAt(0)); + getMethodName.delete(0, 1).insert(0, first); + col.colName = getMethodName.toString(); + colList.add(col); + } + } + return colList; + } + + /** + * 获取字符串占的字符位数 + * + * @param str + * @return + */ + private int getStringCharLength(String str) { + Pattern p = Pattern.compile("[\u4e00-\u9fa5]");//利用正则找到中文 + Matcher m = p.matcher(str); + int count = 0; + while (m.find()) { + count++; + } + return str.length() + count; + } + + /** + * 纵向遍历获取数据每列的长度 + * + * @return + */ + private int[] getColLengths() { + int[] result = new int[data.get(0).length]; + for (int x = 0; x < result.length; x++) { + int max = 0; + for (int y = 0; y < data.size(); y++) { + int len = getStringCharLength(data.get(y)[x]); + if (len > max) { + max = len; + } + } + result[x] = max; + } + if (equilong) {//如果等宽表格 + int max = 0; + for (int len : result) { + if (len > max) max = len; + } + for (int i = 0; i < result.length; i++) { + result[i] = max; + } + } + return result; + } + + /** + * 取得表格字符串 + * + * @return + */ + public String getTableString() { + StringBuilder sb = new StringBuilder(); + int[] colLengths = getColLengths();//获取每列文字宽度 + StringBuilder line = new StringBuilder();//表格横向分隔线 + line.append(o); + for (int len : colLengths) { + int allLen = len + padding * 2;//还需要加上边距和分隔符的长度 + for (int i = 0; i < allLen; i++) { + line.append(h); + } + line.append(o); + } + sb.append(line).append("\r\n"); + for (int y = 0; y < data.size(); y++) { + sb.append(v); + for (int x = 0; x < data.get(y).length; x++) { + String cell = data.get(y)[x]; + switch (align) { + case ALIGN_LEFT: + for (int i = 0; i < padding; i++) {sb.append(s);} + sb.append(cell); + for (int i = 0; i < colLengths[x] - getStringCharLength(cell) + padding; i++) {sb.append(s);} + break; + case ALIGN_RIGHT: + for (int i = 0; i < colLengths[x] - getStringCharLength(cell) + padding; i++) {sb.append(s);} + sb.append(cell); + for (int i = 0; i < padding; i++) {sb.append(s);} + break; + case ALIGN_CENTER: + int space = colLengths[x] - getStringCharLength(cell); + int left = space / 2; + int right = space - left; + for (int i = 0; i < left + padding; i++) {sb.append(s);} + sb.append(cell); + for (int i = 0; i < right + padding; i++) {sb.append(s);} + break; + } + sb.append(v); + } + sb.append("\r\n"); + sb.append(line).append("\r\n"); + } + return sb.toString(); + } + + /** + * 直接打印表格 + */ + public void print() { + LOG.info("\n"+getTableString()); + System.out.println(getTableString()); + } + + //下面是链式调用的set方法 + public TablePrintUtil setAlign(int align) { + this.align = align; + return this; + } + + public TablePrintUtil setEquilong(boolean equilong) { + this.equilong = equilong; + return this; + } + + public TablePrintUtil setPadding(int padding) { + this.padding = padding; + return this; + } + + public TablePrintUtil setH(char h) { + this.h = h; + return this; + } + + public TablePrintUtil setV(char v) { + this.v = v; + return this; + } + + public TablePrintUtil setO(char o) { + this.o = o; + return this; + } + + public TablePrintUtil setS(char s) { + this.s = s; + return this; + } + + /** + * 使用示例 + * + * @param args + */ + public static void main(String[] args) { + List data1 = new ArrayList<>(); + data1.add(new String[]{"用户名", "密码", "姓名"}); + data1.add(new String[]{"xiaoming", "xm123", "小明"}); + data1.add(new String[]{"xiaohong", "xh123", "小红"}); + TablePrintUtil.build(data1).print(); + + List> data2 = new ArrayList<>(); + data2.add(new ArrayList<>()); + data2.add(new ArrayList<>()); + data2.add(new ArrayList<>()); + data2.get(0).add("用户名"); + data2.get(0).add("密码"); + data2.get(0).add("姓名"); + data2.get(1).add("xiaoming"); + data2.get(1).add("xm123"); + data2.get(1).add("小明"); + data2.get(2).add("xiaohong"); + data2.get(2).add("xh123"); + data2.get(2).add("小红"); + TablePrintUtil.build(data2) + .setAlign(TablePrintUtil.ALIGN_LEFT) + .setPadding(5) + .setEquilong(true) + .print(); + + + class User { + String username; + String password; + String name; + + User(String username, String password, String name) { + this.username = username; + this.password = password; + this.name = name; + } + + public String getUsername() { + return username; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + } + List data3 = new ArrayList<>(); + data3.add(new User("xiaoming", "xm123", "小明")); + data3.add(new User("xiaohong", "xh123", "小红")); + TablePrintUtil.build(data3).setH('=').setV('!').print(); + } +} diff --git a/console/console-sink/src/test/java/com/dtstack/flinkx/AppTest.java b/console/console-sink/src/test/java/com/dtstack/flinkx/AppTest.java new file mode 100644 index 000000000..e03e5451f --- /dev/null +++ b/console/console-sink/src/test/java/com/dtstack/flinkx/AppTest.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flinkx; + +import junit.framework.Test; +import junit.framework.TestCase; +import junit.framework.TestSuite; + +/** + * Unit test for simple App. + */ +public class AppTest + extends TestCase +{ + /** + * Create the test case + * + * @param testName name of the test case + */ + public AppTest(String testName ) + { + super( testName ); + } + + /** + * @return the suite of tests being tested + */ + public static Test suite() + { + return new TestSuite( AppTest.class ); + } + + /** + * Rigourous Test :-) + */ + public void testApp() + { + assertTrue( true ); + } +} diff --git a/console/pom.xml b/console/pom.xml new file mode 100644 index 000000000..983e1c185 --- /dev/null +++ b/console/pom.xml @@ -0,0 +1,34 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + sql.console + pom + + + console-sink + + + + + junit + junit + 3.8.1 + test + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + + + \ No newline at end of file diff --git a/core/pom.xml b/core/pom.xml index 1040fcea6..a38d818ae 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -28,9 +28,9 @@ - org.apache.flink - flink-table_2.11 - ${flink.version} + joda-time + joda-time + 2.5 @@ -45,6 +45,25 @@ ${flink.version} + + org.apache.flink + flink-table_2.11 + ${flink.version} + + + + org.apache.calcite + calcite-server + + 1.16.0 + + + + org.apache.flink + flink-cep-scala_2.11 + ${flink.version} + + org.apache.flink flink-scala_2.11 @@ -56,6 +75,18 @@ flink-streaming-scala_2.11 ${flink.version} + + + org.apache.flink + flink-shaded-hadoop2 + ${flink.version} + + + + org.apache.flink + flink-yarn_2.11 + ${flink.version} + diff --git a/core/src/main/java/com/dtstack/flink/sql/ClusterMode.java b/core/src/main/java/com/dtstack/flink/sql/ClusterMode.java index 024a31854..82105d9e7 100644 --- a/core/src/main/java/com/dtstack/flink/sql/ClusterMode.java +++ b/core/src/main/java/com/dtstack/flink/sql/ClusterMode.java @@ -31,4 +31,8 @@ public enum ClusterMode { ClusterMode(int type){ this.type = type; } + + public int getType(){ + return this.type; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/Main.java b/core/src/main/java/com/dtstack/flink/sql/Main.java index 610abf21f..aa05528ce 100644 --- a/core/src/main/java/com/dtstack/flink/sql/Main.java +++ b/core/src/main/java/com/dtstack/flink/sql/Main.java @@ -16,12 +16,15 @@ * limitations under the License. */ - + package com.dtstack.flink.sql; import com.dtstack.flink.sql.classloader.DtClassLoader; import com.dtstack.flink.sql.enums.ECacheType; +import com.dtstack.flink.sql.environment.MyLocalStreamEnvironment; +import com.dtstack.flink.sql.options.LauncherOptionParser; +import com.dtstack.flink.sql.options.LauncherOptions; import com.dtstack.flink.sql.parser.*; import com.dtstack.flink.sql.side.SideSqlExec; import com.dtstack.flink.sql.side.SideTableInfo; @@ -34,6 +37,8 @@ import com.dtstack.flink.sql.watermarker.WaterMarkerAssigner; import com.dtstack.flink.sql.util.FlinkUtil; import com.dtstack.flink.sql.util.PluginUtil; +import com.google.common.base.Preconditions; +import org.apache.calcite.config.Lex; import org.apache.calcite.sql.SqlInsert; import org.apache.calcite.sql.SqlNode; import org.apache.commons.cli.CommandLine; @@ -41,16 +46,18 @@ import org.apache.commons.cli.DefaultParser; import org.apache.commons.cli.Options; import org.apache.commons.io.Charsets; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.time.Time; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; import org.apache.flink.calcite.shaded.com.google.common.base.Strings; import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; import org.apache.flink.calcite.shaded.com.google.common.collect.Sets; import org.apache.flink.client.program.ContextEnvironment; +import org.apache.flink.configuration.Configuration; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamContextEnvironment; @@ -58,7 +65,6 @@ import org.apache.flink.table.api.Table; import org.apache.flink.table.api.java.StreamTableEnvironment; import org.apache.flink.table.sinks.TableSink; -import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -66,6 +72,7 @@ import java.io.IOException; import java.lang.reflect.Field; import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.net.URL; import java.net.URLClassLoader; import java.net.URLDecoder; @@ -97,6 +104,11 @@ public class Main { private static final int delayInterval = 10; //sec + private static org.apache.calcite.sql.parser.SqlParser.Config config = org.apache.calcite.sql.parser.SqlParser + .configBuilder() + .setLex(Lex.MYSQL) + .build(); + public static void main(String[] args) throws Exception { Options options = new Options(); @@ -127,8 +139,8 @@ public static void main(String[] args) throws Exception { sql = URLDecoder.decode(sql, Charsets.UTF_8.name()); SqlParser.setLocalSqlPluginRoot(localSqlPluginPath); - List addJarFileList = Lists.newArrayList(); + if(!Strings.isNullOrEmpty(addJarListStr)){ addJarListStr = URLDecoder.decode(addJarListStr, Charsets.UTF_8.name()); addJarFileList = objMapper.readValue(addJarListStr, List.class); @@ -185,7 +197,7 @@ public static void main(String[] args) throws Exception { if (sqlTree.getTmpTableMap().containsKey(tableName)) { CreateTmpTableParser.SqlParserResult tmp = sqlTree.getTmpTableMap().get(tableName); String realSql = DtStringUtil.replaceIgnoreQuota(result.getExecSql(), "`", ""); - SqlNode sqlNode = org.apache.calcite.sql.parser.SqlParser.create(realSql).parseStmt(); + SqlNode sqlNode = org.apache.calcite.sql.parser.SqlParser.create(realSql,config).parseStmt(); String tmpSql = ((SqlInsert) sqlNode).getSource().toString(); tmp.setExecSql(tmpSql); sideSqlExec.registerTmpTable(tmp, sideTableMap, tableEnv, registerTableCache); @@ -232,6 +244,12 @@ private static void addEnvClassPath(StreamExecutionEnvironment env, Set cla contextEnvironment.getClasspaths().add(url); } } + int i = 0; + for(URL url : classPathSet){ + String classFileName = String.format(CLASS_FILE_NAME_FMT, i); + env.registerCachedFile(url.getPath(), classFileName, true); + i++; + } } private static void registerUDF(SqlTree sqlTree, List jarURList, URLClassLoader parentClassloader, @@ -246,7 +264,7 @@ private static void registerUDF(SqlTree sqlTree, List jarURList, URLClassLo classLoader = FlinkUtil.loadExtraJar(jarURList, parentClassloader); } classLoader.loadClass(funcInfo.getClassName()); - FlinkUtil.registerUDF(funcInfo.getType(), funcInfo.getClassName(), funcInfo.getName().toUpperCase(), + FlinkUtil.registerUDF(funcInfo.getType(), funcInfo.getClassName(), funcInfo.getName(), tableEnv, classLoader); } } @@ -283,18 +301,23 @@ private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment en Table regTable = tableEnv.fromDataStream(adaptStream, fields); tableEnv.registerTable(tableInfo.getName(), regTable); registerTableCache.put(tableInfo.getName(), regTable); - classPathSet.add(PluginUtil.getRemoteJarFilePath(tableInfo.getType(), SourceTableInfo.SOURCE_SUFFIX, remoteSqlPluginPath)); + if(StringUtils.isNotBlank(remoteSqlPluginPath)){ + classPathSet.add(PluginUtil.getRemoteJarFilePath(tableInfo.getType(), SourceTableInfo.SOURCE_SUFFIX, remoteSqlPluginPath)); + } } else if (tableInfo instanceof TargetTableInfo) { TableSink tableSink = StreamSinkFactory.getTableSink((TargetTableInfo) tableInfo, localSqlPluginPath); TypeInformation[] flinkTypes = FlinkUtil.transformTypes(tableInfo.getFieldClasses()); tableEnv.registerTableSink(tableInfo.getName(), tableInfo.getFields(), flinkTypes, tableSink); - classPathSet.add( PluginUtil.getRemoteJarFilePath(tableInfo.getType(), TargetTableInfo.TARGET_SUFFIX, remoteSqlPluginPath)); + if(StringUtils.isNotBlank(remoteSqlPluginPath)){ + classPathSet.add( PluginUtil.getRemoteJarFilePath(tableInfo.getType(), TargetTableInfo.TARGET_SUFFIX, remoteSqlPluginPath)); + } } else if(tableInfo instanceof SideTableInfo){ - String sideOperator = ECacheType.ALL.name().equals(((SideTableInfo) tableInfo).getCacheType()) ? "all" : "async"; sideTableMap.put(tableInfo.getName(), (SideTableInfo) tableInfo); - classPathSet.add(PluginUtil.getRemoteSideJarFilePath(tableInfo.getType(), sideOperator, SideTableInfo.TARGET_SUFFIX, remoteSqlPluginPath)); + if(StringUtils.isNotBlank(remoteSqlPluginPath)){ + classPathSet.add(PluginUtil.getRemoteSideJarFilePath(tableInfo.getType(), sideOperator, SideTableInfo.TARGET_SUFFIX, remoteSqlPluginPath)); + } }else { throw new RuntimeException("not support table type:" + tableInfo.getType()); } @@ -302,20 +325,35 @@ private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment en //The plug-in information corresponding to the table is loaded into the classPath env addEnvClassPath(env, classPathSet); - int i = 0; - for(URL url : classPathSet){ - String classFileName = String.format(CLASS_FILE_NAME_FMT, i); - env.registerCachedFile(url.getPath(), classFileName, true); - i++; - } } - private static StreamExecutionEnvironment getStreamExeEnv(Properties confProperties, String deployMode) throws IOException { + private static StreamExecutionEnvironment getStreamExeEnv(Properties confProperties, String deployMode) throws IOException, NoSuchMethodException { StreamExecutionEnvironment env = !ClusterMode.local.name().equals(deployMode) ? StreamExecutionEnvironment.getExecutionEnvironment() : new MyLocalStreamEnvironment(); env.setParallelism(FlinkUtil.getEnvParallelism(confProperties)); + Configuration globalJobParameters = new Configuration(); + Method method = Configuration.class.getDeclaredMethod("setValueInternal", String.class, Object.class); + method.setAccessible(true); + + confProperties.forEach((key,val) -> { + try { + method.invoke(globalJobParameters, key, val); + } catch (IllegalAccessException e) { + e.printStackTrace(); + } catch (InvocationTargetException e) { + e.printStackTrace(); + } + }); + + ExecutionConfig exeConfig = env.getConfig(); + if(exeConfig.getGlobalJobParameters() == null){ + exeConfig.setGlobalJobParameters(globalJobParameters); + }else if(exeConfig.getGlobalJobParameters() instanceof Configuration){ + ((Configuration) exeConfig.getGlobalJobParameters()).addAll(globalJobParameters); + } + if(FlinkUtil.getMaxEnvParallelism(confProperties) > 0){ env.setMaxParallelism(FlinkUtil.getMaxEnvParallelism(confProperties)); diff --git a/core/src/main/java/com/dtstack/flink/sql/MyLocalStreamEnvironment.java b/core/src/main/java/com/dtstack/flink/sql/environment/MyLocalStreamEnvironment.java similarity index 95% rename from core/src/main/java/com/dtstack/flink/sql/MyLocalStreamEnvironment.java rename to core/src/main/java/com/dtstack/flink/sql/environment/MyLocalStreamEnvironment.java index 54ddaa647..c1cea1e14 100644 --- a/core/src/main/java/com/dtstack/flink/sql/MyLocalStreamEnvironment.java +++ b/core/src/main/java/com/dtstack/flink/sql/environment/MyLocalStreamEnvironment.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package com.dtstack.flink.sql; +package com.dtstack.flink.sql.environment; import org.apache.flink.api.common.InvalidProgramException; import org.apache.flink.api.common.JobExecutionResult; @@ -100,8 +100,8 @@ public JobExecutionResult execute(String jobName) throws Exception { Configuration configuration = new Configuration(); configuration.addAll(jobGraph.getJobConfiguration()); - configuration.setLong(TaskManagerOptions.MANAGED_MEMORY_SIZE, -1L); - configuration.setInteger(ConfigConstants.TASK_MANAGER_NUM_TASK_SLOTS, jobGraph.getMaximumParallelism()); + configuration.setString(TaskManagerOptions.MANAGED_MEMORY_SIZE, "512M"); + configuration.setInteger(TaskManagerOptions.NUM_TASK_SLOTS, jobGraph.getMaximumParallelism()); // add (and override) the settings with what the user defined configuration.addAll(this.conf); diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptionParser.java b/core/src/main/java/com/dtstack/flink/sql/options/LauncherOptionParser.java similarity index 94% rename from launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptionParser.java rename to core/src/main/java/com/dtstack/flink/sql/options/LauncherOptionParser.java index 75c5c4f0f..d4b4a19f2 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptionParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/options/LauncherOptionParser.java @@ -16,7 +16,7 @@ * limitations under the License. */ -package com.dtstack.flink.sql.launcher; +package com.dtstack.flink.sql.options; import avro.shaded.com.google.common.collect.Lists; import org.apache.commons.cli.BasicParser; @@ -73,7 +73,7 @@ public class LauncherOptionParser { private LauncherOptions properties = new LauncherOptions(); - public LauncherOptionParser(String[] args) { + public LauncherOptionParser(String[] args) throws Exception { options.addOption(OPTION_MODE, true, "Running mode"); options.addOption(OPTION_SQL, true, "Job sql file"); options.addOption(OPTION_NAME, true, "Job name"); @@ -87,8 +87,6 @@ public LauncherOptionParser(String[] args) { options.addOption(OPTION_SAVE_POINT_PATH, true, "Savepoint restore path"); options.addOption(OPTION_ALLOW_NON_RESTORED_STATE, true, "Flag indicating whether non restored state is allowed if the savepoint"); options.addOption(OPTION_FLINK_JAR_PATH, true, "flink jar path for submit of perjob mode"); - - try { CommandLine cl = parser.parse(options, args); String mode = cl.getOptionValue(OPTION_MODE, ClusterMode.local.name()); //check mode @@ -101,21 +99,24 @@ public LauncherOptionParser(String[] args) { byte[] filecontent = new byte[(int) file.length()]; in.read(filecontent); String content = new String(filecontent, "UTF-8"); + String sql = URLEncoder.encode(content, Charsets.UTF_8.name()); properties.setSql(sql); + String localPlugin = Preconditions.checkNotNull(cl.getOptionValue(OPTION_LOCAL_SQL_PLUGIN_PATH)); properties.setLocalSqlPluginPath(localPlugin); + String remotePlugin = cl.getOptionValue(OPTION_REMOTE_SQL_PLUGIN_PATH); - if(!ClusterMode.local.name().equals(mode)){ - Preconditions.checkNotNull(remotePlugin); - properties.setRemoteSqlPluginPath(remotePlugin); - } + properties.setRemoteSqlPluginPath(remotePlugin); + String name = Preconditions.checkNotNull(cl.getOptionValue(OPTION_NAME)); properties.setName(name); + String addJar = cl.getOptionValue(OPTION_ADDJAR); if(StringUtils.isNotBlank(addJar)){ properties.setAddjar(addJar); } + String confProp = cl.getOptionValue(OPTION_CONF_PROP); if(StringUtils.isNotBlank(confProp)){ properties.setConfProp(confProp); @@ -144,10 +145,6 @@ public LauncherOptionParser(String[] args) { if(StringUtils.isNotBlank(flinkJarPath)){ properties.setFlinkJarPath(flinkJarPath); } - - } catch (Exception e) { - throw new RuntimeException(e); - } } public LauncherOptions getLauncherOptions(){ @@ -157,6 +154,7 @@ public LauncherOptions getLauncherOptions(){ public List getProgramExeArgList() throws Exception { Map mapConf = PluginUtil.ObjectToMap(properties); List args = Lists.newArrayList(); + for(Map.Entry one : mapConf.entrySet()){ String key = one.getKey(); if(OPTION_FLINK_CONF_DIR.equalsIgnoreCase(key) diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptions.java b/core/src/main/java/com/dtstack/flink/sql/options/LauncherOptions.java similarity index 94% rename from launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptions.java rename to core/src/main/java/com/dtstack/flink/sql/options/LauncherOptions.java index 5cc762ebd..d84637abc 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherOptions.java +++ b/core/src/main/java/com/dtstack/flink/sql/options/LauncherOptions.java @@ -16,7 +16,10 @@ * limitations under the License. */ -package com.dtstack.flink.sql.launcher; +package com.dtstack.flink.sql.options; + +import com.dtstack.flink.sql.ClusterMode; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; /** * This class define commandline options for the Launcher program @@ -26,7 +29,7 @@ */ public class LauncherOptions { - private String mode; + private String mode = ClusterMode.local.name(); private String name; @@ -156,4 +159,5 @@ public String getQueue() { public void setQueue(String queue) { this.queue = queue; } + } diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java index 9b7017743..793dd6baa 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java @@ -24,7 +24,7 @@ import java.util.regex.Pattern; /** - * 解析创建自定义方法sql + * parser register udf sql * Date: 2018/6/26 * Company: www.dtstack.com * @author xuchao diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java index d10d6825e..5e126e786 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java @@ -29,7 +29,7 @@ import java.util.regex.Pattern; /** - * 解析创建表结构sql + * parser create table sql * Date: 2018/6/26 * Company: www.dtstack.com * @author xuchao @@ -54,7 +54,7 @@ public boolean verify(String sql) { public void parseSql(String sql, SqlTree sqlTree) { Matcher matcher = PATTERN.matcher(sql); if(matcher.find()){ - String tableName = matcher.group(1).toUpperCase(); + String tableName = matcher.group(1); String fieldsInfoStr = matcher.group(2); String propsStr = matcher.group(3); Map props = parseProp(propsStr); diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java index 9bd1374a0..db18986b7 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java @@ -20,6 +20,8 @@ package com.dtstack.flink.sql.parser; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.calcite.config.Lex; import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; @@ -31,6 +33,12 @@ import static org.apache.calcite.sql.SqlKind.IDENTIFIER; +/** + * parser create tmp table sql + * Date: 2018/6/26 + * Company: www.dtstack.com + * @author yanxi + */ public class CreateTmpTableParser implements IParser { //select table tableName as select @@ -61,11 +69,16 @@ public void parseSql(String sql, SqlTree sqlTree) { String tableName = null; String selectSql = null; if(matcher.find()) { - tableName = matcher.group(1).toUpperCase(); + tableName = matcher.group(1); selectSql = "select " + matcher.group(2); } - SqlParser sqlParser = SqlParser.create(selectSql); + SqlParser.Config config = SqlParser + .configBuilder() + .setLex(Lex.MYSQL) + .build(); + SqlParser sqlParser = SqlParser.create(selectSql,config); + SqlNode sqlNode = null; try { sqlNode = sqlParser.parseStmt(); @@ -77,7 +90,8 @@ public void parseSql(String sql, SqlTree sqlTree) { parseNode(sqlNode, sqlParseResult); sqlParseResult.setTableName(tableName); - sqlParseResult.setExecSql(selectSql.toUpperCase()); + String transformSelectSql = DtStringUtil.replaceIgnoreQuota(sqlNode.toString(), "`", ""); + sqlParseResult.setExecSql(transformSelectSql); sqlTree.addTmpSql(sqlParseResult); sqlTree.addTmplTableInfo(tableName, sqlParseResult); } else { @@ -87,7 +101,7 @@ public void parseSql(String sql, SqlTree sqlTree) { String tableName = null; String fieldsInfoStr = null; if (matcher.find()){ - tableName = matcher.group(1).toUpperCase(); + tableName = matcher.group(1); fieldsInfoStr = matcher.group(2); } CreateTmpTableParser.SqlParserResult sqlParseResult = new CreateTmpTableParser.SqlParserResult(); diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java index e08540c92..40629b139 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java @@ -20,12 +20,8 @@ package com.dtstack.flink.sql.parser; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlInsert; -import org.apache.calcite.sql.SqlJoin; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.config.Lex; +import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.commons.lang3.StringUtils; @@ -36,8 +32,7 @@ import static org.apache.calcite.sql.SqlKind.IDENTIFIER; /** - * 解析flink sql - * sql 只支持 insert 开头的 + * parser flink sql * Date: 2018/6/22 * Company: www.dtstack.com * @author xuchao @@ -57,7 +52,11 @@ public static InsertSqlParser newInstance(){ @Override public void parseSql(String sql, SqlTree sqlTree) { - SqlParser sqlParser = SqlParser.create(sql); + SqlParser.Config config = SqlParser + .configBuilder() + .setLex(Lex.MYSQL) + .build(); + SqlParser sqlParser = SqlParser.create(sql,config); SqlNode sqlNode = null; try { sqlNode = sqlParser.parseStmt(); @@ -113,6 +112,10 @@ private static void parseNode(SqlNode sqlNode, SqlParseResult sqlParseResult){ sqlParseResult.addSourceTable(identifierNode.toString()); } break; + case MATCH_RECOGNIZE: + SqlMatchRecognize node = (SqlMatchRecognize) sqlNode; + sqlParseResult.addSourceTable(node.getTableRef().toString()); + break; case UNION: SqlNode unionLeft = ((SqlBasicCall)sqlNode).getOperands()[0]; SqlNode unionRight = ((SqlBasicCall)sqlNode).getOperands()[1]; diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java b/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java index 3ed37c51e..754de0819 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java @@ -29,7 +29,7 @@ import java.util.Map; /** - * 解析sql获得的对象结构 + * parser sql to get the Sql Tree structure * Date: 2018/6/25 * Company: www.dtstack.com * @author xuchao diff --git a/core/src/main/java/com/dtstack/flink/sql/side/AllReqRow.java b/core/src/main/java/com/dtstack/flink/sql/side/AllReqRow.java index 303260742..a185da1bd 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/AllReqRow.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/AllReqRow.java @@ -37,7 +37,7 @@ * @author xuchao */ -public abstract class AllReqRow extends RichFlatMapFunction{ +public abstract class AllReqRow extends RichFlatMapFunction implements ISideReqRow { protected SideInfo sideInfo; @@ -48,8 +48,6 @@ public AllReqRow(SideInfo sideInfo){ } - protected abstract Row fillData(Row input, Object sideInput); - protected abstract void initCache() throws SQLException; protected abstract void reloadCache(); diff --git a/core/src/main/java/com/dtstack/flink/sql/side/AsyncReqRow.java b/core/src/main/java/com/dtstack/flink/sql/side/AsyncReqRow.java index 6617bb29f..6df1af1d5 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/AsyncReqRow.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/AsyncReqRow.java @@ -40,7 +40,7 @@ * @author xuchao */ -public abstract class AsyncReqRow extends RichAsyncFunction { +public abstract class AsyncReqRow extends RichAsyncFunction implements ISideReqRow { private static final long serialVersionUID = 2098635244857937717L; @@ -79,9 +79,6 @@ protected boolean openCache(){ return sideInfo.getSideCache() != null; } - - protected abstract Row fillData(Row input, Object sideInput); - protected void dealMissKey(Row input, ResultFuture resultFuture){ if(sideInfo.getJoinType() == JoinType.LEFT){ //Reserved left table data diff --git a/core/src/main/java/com/dtstack/flink/sql/side/ISideReqRow.java b/core/src/main/java/com/dtstack/flink/sql/side/ISideReqRow.java new file mode 100644 index 000000000..88066e37f --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/side/ISideReqRow.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side; + +import org.apache.flink.types.Row; + +/** + * + * Date: 2018/12/4 + * Company: www.dtstack.com + * @author xuchao + */ +public interface ISideReqRow { + + Row fillData(Row input, Object sideInput); + +} diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java b/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java index 7f165ac30..b412b1d18 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.side; -import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.calcite.config.Lex; import org.apache.calcite.sql.JoinType; import org.apache.calcite.sql.SqlAsOperator; import org.apache.calcite.sql.SqlBasicCall; @@ -52,11 +52,14 @@ public class SideSQLParser { public Queue getExeQueue(String exeSql, Set sideTableSet) throws SqlParseException { - exeSql = DtStringUtil.replaceIgnoreQuota(exeSql, "`", ""); System.out.println("---exeSql---"); System.out.println(exeSql); Queue queueInfo = Queues.newLinkedBlockingQueue(); - SqlParser sqlParser = SqlParser.create(exeSql); + SqlParser.Config config = SqlParser + .configBuilder() + .setLex(Lex.MYSQL) + .build(); + SqlParser sqlParser = SqlParser.create(exeSql,config); SqlNode sqlNode = sqlParser.parseStmt(); parseSql(sqlNode, sideTableSet, queueInfo); queueInfo.offer(sqlNode); diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java b/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java index 080d3d48b..5e5534438 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java @@ -67,6 +67,8 @@ public class SideSqlExec { private SideSQLParser sideSQLParser = new SideSQLParser(); + private Map localTableCache = Maps.newHashMap(); + public void exec(String sql, Map sideTableMap, StreamTableEnvironment tableEnv, Map tableCache) throws Exception { @@ -75,7 +77,7 @@ public void exec(String sql, Map sideTableMap, StreamTabl throw new RuntimeException("need to set localSqlPluginPath"); } - Map localTableCache = Maps.newHashMap(tableCache); + localTableCache.putAll(tableCache); Queue exeQueue = sideSQLParser.getExeQueue(sql, sideTableMap.keySet()); Object pollObj = null; @@ -389,6 +391,7 @@ private SqlNode replaceSelectFieldName(SqlNode selectNode, HashBasedTable localTableCache = Maps.newHashMap(tableCache); + localTableCache.putAll(tableCache); Queue exeQueue = sideSQLParser.getExeQueue(result.getExecSql(), sideTableMap.keySet()); Object pollObj = null; @@ -540,6 +543,7 @@ public void registerTmpTable(CreateTmpTableParser.SqlParserResult result, throw new RuntimeException("Fields mismatch"); } } + localTableCache.put(result.getTableName(), table); } @@ -642,7 +646,7 @@ private boolean checkFieldsInfo(CreateTmpTableParser.SqlParserResult result, Tab String[] filedNameArr = new String[filed.length - 1]; System.arraycopy(filed, 0, filedNameArr, 0, filed.length - 1); String fieldName = String.join(" ", filedNameArr); - fieldNames.add(fieldName.toUpperCase()); + fieldNames.add(fieldName); String fieldType = filed[filed.length - 1 ].trim(); Class fieldClass = ClassUtil.stringConvertClass(fieldType); Class tableField = table.getSchema().getType(i).get().getTypeClass(); diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideTableInfo.java b/core/src/main/java/com/dtstack/flink/sql/side/SideTableInfo.java index 0abd55a92..e21389ea7 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideTableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideTableInfo.java @@ -45,14 +45,26 @@ public abstract class SideTableInfo extends TableInfo implements Serializable { public static final String PARTITIONED_JOIN_KEY = "partitionedJoin"; + public static final String CACHE_MODE_KEY = "cacheMode"; + + public static final String ASYNC_CAP_KEY = "asyncCapacity"; + + public static final String ASYNC_TIMEOUT_KEY = "asyncTimeout"; + private String cacheType = "none";//None or LRU or ALL private int cacheSize = 10000; private long cacheTimeout = 60 * 1000;// + private int asyncCapacity=100; + + private int asyncTimeout=10000; + private boolean partitionedJoin = false; + private String cacheMode="ordered"; + public RowTypeInfo getRowTypeInfo(){ Class[] fieldClass = getFieldClasses(); TypeInformation[] types = new TypeInformation[fieldClass.length]; @@ -95,4 +107,28 @@ public boolean isPartitionedJoin() { public void setPartitionedJoin(boolean partitionedJoin) { this.partitionedJoin = partitionedJoin; } + + public String getCacheMode() { + return cacheMode; + } + + public void setCacheMode(String cacheMode) { + this.cacheMode = cacheMode; + } + + public int getAsyncCapacity() { + return asyncCapacity; + } + + public void setAsyncCapacity(int asyncCapacity) { + this.asyncCapacity = asyncCapacity; + } + + public int getAsyncTimeout() { + return asyncTimeout; + } + + public void setAsyncTimeout(int asyncTimeout) { + this.asyncTimeout = asyncTimeout; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java index df9a1c175..688a19c35 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java @@ -44,8 +44,10 @@ public class SideAsyncOperator { private static final String PATH_FORMAT = "%sasyncside"; - //TODO need to set by create table task - private static int asyncCapacity = 100; + private static final String OPERATOR_TYPE = "Async"; + + private static final String ORDERED = "ordered"; + private static AsyncReqRow loadAsyncReq(String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) throws Exception { @@ -54,7 +56,7 @@ private static AsyncReqRow loadAsyncReq(String sideType, String sqlRootDir, RowT String pluginJarPath = PluginUtil.getJarFileDirPath(pathOfType, sqlRootDir); DtClassLoader dtClassLoader = (DtClassLoader) classLoader; PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); - String className = PluginUtil.getSqlSideClassName(sideType, "side", "Async"); + String className = PluginUtil.getSqlSideClassName(sideType, "side", OPERATOR_TYPE); return dtClassLoader.loadClass(className).asSubclass(AsyncReqRow.class) .getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class).newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); } @@ -62,8 +64,15 @@ private static AsyncReqRow loadAsyncReq(String sideType, String sqlRootDir, RowT public static DataStream getSideJoinDataStream(DataStream inputStream, String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) throws Exception { AsyncReqRow asyncDbReq = loadAsyncReq(sideType, sqlRootDir, rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + //TODO How much should be set for the degree of parallelism? Timeout? capacity settings? - return AsyncDataStream.orderedWait(inputStream, asyncDbReq, 10000, TimeUnit.MILLISECONDS, asyncCapacity) - .setParallelism(sideTableInfo.getParallelism()); + if (ORDERED.equals(sideTableInfo.getCacheMode())){ + return AsyncDataStream.orderedWait(inputStream, asyncDbReq, sideTableInfo.getAsyncTimeout(), TimeUnit.MILLISECONDS, sideTableInfo.getAsyncCapacity()) + .setParallelism(sideTableInfo.getParallelism()); + }else { + return AsyncDataStream.unorderedWait(inputStream, asyncDbReq, sideTableInfo.getAsyncTimeout(), TimeUnit.MILLISECONDS, sideTableInfo.getAsyncCapacity()) + .setParallelism(sideTableInfo.getParallelism()); + } + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java index 72a67d00b..725798848 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java @@ -43,6 +43,8 @@ public class SideWithAllCacheOperator { private static final String PATH_FORMAT = "%sallside"; + private static final String OPERATOR_TYPE = "All"; + private static AllReqRow loadFlatMap(String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) throws Exception { @@ -53,7 +55,7 @@ private static AllReqRow loadFlatMap(String sideType, String sqlRootDir, RowType DtClassLoader dtClassLoader = (DtClassLoader) classLoader; PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); - String className = PluginUtil.getSqlSideClassName(sideType, "side", "All"); + String className = PluginUtil.getSqlSideClassName(sideType, "side", OPERATOR_TYPE); return dtClassLoader.loadClass(className).asSubclass(AllReqRow.class).getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class) .newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); diff --git a/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java b/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java index 9ef30f97c..e072a2345 100644 --- a/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java +++ b/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java @@ -51,9 +51,9 @@ public static AbsTableParser getSqlParser(String pluginType, String sqlRootDir) DtClassLoader dtClassLoader = (DtClassLoader) classLoader; String pluginJarPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, pluginType), sqlRootDir); - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); - String className = PluginUtil.getSqlParserClassName(pluginType, CURR_TYPE); + String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(pluginType); + String className = PluginUtil.getSqlParserClassName(typeNoVersion, CURR_TYPE); Class targetParser = dtClassLoader.loadClass(className); if(!AbsTableParser.class.isAssignableFrom(targetParser)){ @@ -76,8 +76,9 @@ public static TableSink getTableSink(TargetTableInfo targetTableInfo, String loc String pluginJarDirPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, pluginType), localSqlRootDir); PluginUtil.addPluginJar(pluginJarDirPath, dtClassLoader); + String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(pluginType); - String className = PluginUtil.getGenerClassName(pluginType, CURR_TYPE); + String className = PluginUtil.getGenerClassName(typeNoVersion, CURR_TYPE); Class sinkClass = dtClassLoader.loadClass(className); if(!IStreamSinkGener.class.isAssignableFrom(sinkClass)){ diff --git a/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java b/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java index f8ede801b..ae8135256 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java @@ -16,13 +16,14 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.table; import com.dtstack.flink.sql.enums.ECacheType; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.util.MathUtil; +import org.apache.commons.lang3.StringUtils; import java.util.Map; import java.util.regex.Matcher; @@ -85,6 +86,31 @@ protected void parseCacheProp(SideTableInfo sideTableInfo, Map p sideTableInfo.setPartitionedJoin(true); } } + + if(props.containsKey(SideTableInfo.CACHE_MODE_KEY.toLowerCase())){ + String cachemode = MathUtil.getString(props.get(SideTableInfo.CACHE_MODE_KEY.toLowerCase())); + + if(!cachemode.equalsIgnoreCase("ordered") && !cachemode.equalsIgnoreCase("unordered")){ + throw new RuntimeException("cachemode must ordered or unordered!"); + } + sideTableInfo.setCacheMode(cachemode.toLowerCase()); + } + + if(props.containsKey(SideTableInfo.ASYNC_CAP_KEY.toLowerCase())){ + Integer asyncCap = MathUtil.getIntegerVal(props.get(SideTableInfo.ASYNC_CAP_KEY.toLowerCase())); + if(asyncCap < 0){ + throw new RuntimeException("asyncCapacity size need > 0."); + } + sideTableInfo.setAsyncCapacity(asyncCap); + } + + if(props.containsKey(SideTableInfo.ASYNC_TIMEOUT_KEY.toLowerCase())){ + Integer asyncTimeout = MathUtil.getIntegerVal(props.get(SideTableInfo.ASYNC_TIMEOUT_KEY.toLowerCase())); + if (asyncTimeout<0){ + throw new RuntimeException("asyncTimeout size need > 0."); + } + sideTableInfo.setAsyncTimeout(asyncTimeout); + } } } } diff --git a/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java b/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java index 2fc2d799b..4d1625b18 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java @@ -82,9 +82,6 @@ public void parseFieldsInfo(String fieldsInfo, TableInfo tableInfo){ String[] fieldRows = DtStringUtil.splitIgnoreQuotaBrackets(fieldsInfo, ","); for(String fieldRow : fieldRows){ fieldRow = fieldRow.trim(); - if(fieldNameNeedsUpperCase()) { - fieldRow = fieldRow.toUpperCase(); - } boolean isMatcherKey = dealKeyPattern(fieldRow, tableInfo); @@ -113,7 +110,7 @@ public void parseFieldsInfo(String fieldsInfo, TableInfo tableInfo){ } public static void dealPrimaryKey(Matcher matcher, TableInfo tableInfo){ - String primaryFields = matcher.group(1); + String primaryFields = matcher.group(1).trim(); String[] splitArry = primaryFields.split(","); List primaryKes = Lists.newArrayList(splitArry); tableInfo.setPrimaryKeys(primaryKes); diff --git a/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java b/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java index ed342e002..ebe675c75 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java @@ -20,10 +20,14 @@ package com.dtstack.flink.sql.table; +import com.google.common.collect.Lists; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; import org.apache.flink.calcite.shaded.com.google.common.base.Strings; import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import java.util.ArrayList; import java.util.Map; +import java.util.TimeZone; /** * Reason: @@ -36,6 +40,14 @@ public abstract class SourceTableInfo extends TableInfo { public static final String SOURCE_SUFFIX = "Source"; + public static final String SOURCE_DATA_TYPE = "sourcedatatype"; + + public static final String TIME_ZONE_KEY="timezone"; + + private String sourceDataType = "json"; + + private String timeZone="Asia/Shanghai"; + private String eventTimeField; private Integer maxOutOrderness = 10; @@ -101,4 +113,33 @@ public String getAdaptSelectSql(){ public String getAdaptName(){ return getName() + "_adapt"; } + + public String getSourceDataType() { + return sourceDataType; + } + + public void setSourceDataType(String sourceDataType) { + this.sourceDataType = sourceDataType; + } + + public String getTimeZone() { + return timeZone; + } + + public void setTimeZone(String timeZone) { + if (timeZone==null){ + return; + } + timeZoneCheck(timeZone); + this.timeZone = timeZone; + } + + private void timeZoneCheck(String timeZone) { + ArrayList zones = Lists.newArrayList(TimeZone.getAvailableIDs()); + if (!zones.contains(timeZone)){ + throw new IllegalArgumentException(" timezone is Incorrect!"); + } + } + + } diff --git a/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java b/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java index 71bea466f..d57b3061a 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java @@ -22,6 +22,7 @@ import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import java.io.Serializable; import java.util.List; /** @@ -31,10 +32,14 @@ * @author xuchao */ -public abstract class TableInfo { +public abstract class TableInfo implements Serializable { public static final String PARALLELISM_KEY = "parallelism"; + public static final String FIELD_DELINITER = "fielddelimiter"; + + public static final String LENGTH_CHECK_POLICY = "lengthcheckpolicy"; + private String name; private String type; @@ -55,6 +60,10 @@ public abstract class TableInfo { private Integer parallelism = 1; + private String fieldDelimiter; + + private String lengthCheckPolicy = "SKIP"; + public String[] getFieldTypes() { return fieldTypes; } @@ -121,6 +130,47 @@ public void addFieldType(String fieldType){ fieldTypeList.add(fieldType); } + public void setFields(String[] fields) { + this.fields = fields; + } + + public void setFieldTypes(String[] fieldTypes) { + this.fieldTypes = fieldTypes; + } + + public void setFieldClasses(Class[] fieldClasses) { + this.fieldClasses = fieldClasses; + } + + public List getFieldList() { + return fieldList; + } + + public List getFieldTypeList() { + return fieldTypeList; + } + + public List getFieldClassList() { + return fieldClassList; + } + + + + public String getFieldDelimiter() { + return fieldDelimiter; + } + + public void setFieldDelimiter(String fieldDelimiter) { + this.fieldDelimiter = fieldDelimiter; + } + + public String getLengthCheckPolicy() { + return lengthCheckPolicy; + } + + public void setLengthCheckPolicy(String lengthCheckPolicy) { + this.lengthCheckPolicy = lengthCheckPolicy; + } public void finish(){ this.fields = fieldList.toArray(new String[fieldList.size()]); diff --git a/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java b/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java index b3a07d6d5..4c2f67f68 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java @@ -20,7 +20,6 @@ package com.dtstack.flink.sql.table; -import com.dtstack.flink.sql.enums.ECacheType; import com.dtstack.flink.sql.enums.ETableType; import com.dtstack.flink.sql.parser.CreateTableParser; import com.dtstack.flink.sql.side.SideTableInfo; diff --git a/core/src/main/java/com/dtstack/flink/sql/table/TargetTableInfo.java b/core/src/main/java/com/dtstack/flink/sql/table/TargetTableInfo.java index a9a846707..70b625b5c 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/TargetTableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/TargetTableInfo.java @@ -30,4 +30,16 @@ public abstract class TargetTableInfo extends TableInfo { public static final String TARGET_SUFFIX = "Sink"; + + public static final String SINK_DATA_TYPE = "sinkdatatype"; + + private String sinkDataType = "json"; + + public String getSinkDataType() { + return sinkDataType; + } + + public void setSinkDataType(String sinkDataType) { + this.sinkDataType = sinkDataType; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java b/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java index 959d4b13c..96026ca4b 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java @@ -16,7 +16,7 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.util; @@ -25,7 +25,7 @@ import org.apache.flink.calcite.shaded.com.google.common.base.Strings; import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; - +import java.sql.Timestamp; import java.math.BigDecimal; import java.util.ArrayList; import java.util.List; @@ -225,4 +225,27 @@ public static boolean isJosn(String str){ } return flag; } + + public static Object parse(String str,Class clazz){ + String fieldType = clazz.getName(); + Object object = null; + if(fieldType.equals(Integer.class.getName())){ + object = Integer.parseInt(str); + }else if(fieldType.equals(Long.class.getName())){ + object = Long.parseLong(str); + }else if(fieldType.equals(Byte.class.getName())){ + object = str.getBytes()[0]; + }else if(fieldType.equals(String.class.getName())){ + object = str; + }else if(fieldType.equals(Float.class.getName())){ + object = Float.parseFloat(str); + }else if(fieldType.equals(Double.class.getName())){ + object = Double.parseDouble(str); + }else if (fieldType.equals(Timestamp.class.getName())){ + object = Timestamp.valueOf(str); + }else{ + throw new RuntimeException("no support field type for sql. the input type:" + fieldType); + } + return object; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForLong.java b/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForLong.java index 0a6874a81..632c10c7b 100644 --- a/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForLong.java +++ b/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForLong.java @@ -16,7 +16,7 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.watermarker; @@ -27,6 +27,8 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.util.TimeZone; + /** * Custom watermark --- for eventtime * Date: 2017/12/28 @@ -40,13 +42,16 @@ public class CustomerWaterMarkerForLong extends AbsCustomerWaterMarker { private static final long serialVersionUID = 1L; + private TimeZone timezone; + private int pos; private long lastTime = 0; - public CustomerWaterMarkerForLong(Time maxOutOfOrderness, int pos) { + public CustomerWaterMarkerForLong(Time maxOutOfOrderness, int pos,String timezone) { super(maxOutOfOrderness); this.pos = pos; + this.timezone= TimeZone.getTimeZone(timezone); } @Override @@ -54,13 +59,21 @@ public long extractTimestamp(Row row) { try{ Long eveTime = MathUtil.getLongVal(row.getField(pos)); - lastTime = eveTime; - eventDelayGauge.setDelayTime(MathUtil.getIntegerVal((System.currentTimeMillis() - eveTime)/1000)); - return eveTime; + Long extractTime=eveTime; + + lastTime = extractTime + timezone.getOffset(extractTime); + + eventDelayGauge.setDelayTime(MathUtil.getIntegerVal((System.currentTimeMillis() - convertTimeZone(extractTime))/1000)); + + return lastTime; }catch (Exception e){ logger.error("", e); } - return lastTime; } + + public long convertTimeZone(long evenTime){ + long res = evenTime - timezone.getOffset(evenTime) + TimeZone.getDefault().getOffset(evenTime); + return res; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForTimeStamp.java b/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForTimeStamp.java index b4aee0302..e4af63744 100644 --- a/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForTimeStamp.java +++ b/core/src/main/java/com/dtstack/flink/sql/watermarker/CustomerWaterMarkerForTimeStamp.java @@ -16,7 +16,7 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.watermarker; @@ -27,6 +27,7 @@ import org.slf4j.LoggerFactory; import java.sql.Timestamp; +import java.util.TimeZone; /** * Custom watermark --- for eventtime @@ -45,25 +46,35 @@ public class CustomerWaterMarkerForTimeStamp extends AbsCustomerWaterMarker private long lastTime = 0; + private TimeZone timezone; + - public CustomerWaterMarkerForTimeStamp(Time maxOutOfOrderness, int pos) { + public CustomerWaterMarkerForTimeStamp(Time maxOutOfOrderness, int pos,String timezone) { super(maxOutOfOrderness); this.pos = pos; + this.timezone= TimeZone.getTimeZone(timezone); } @Override public long extractTimestamp(Row row) { try { Timestamp time = (Timestamp) row.getField(pos); - lastTime = time.getTime(); - eventDelayGauge.setDelayTime(MathUtil.getIntegerVal((System.currentTimeMillis() - time.getTime())/1000)); - return time.getTime(); + long extractTime=time.getTime(); + + lastTime = extractTime + timezone.getOffset(extractTime); + + eventDelayGauge.setDelayTime(MathUtil.getIntegerVal((System.currentTimeMillis() - convertTimeZone(extractTime))/1000)); + + return lastTime; } catch (RuntimeException e) { logger.error("", e); } return lastTime; } - + public long convertTimeZone(long evenTime){ + long res = evenTime - timezone.getOffset(evenTime) + TimeZone.getDefault().getOffset(evenTime); + return res; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java b/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java index a29e8391b..4f386d75f 100644 --- a/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java +++ b/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java @@ -21,6 +21,7 @@ package com.dtstack.flink.sql.watermarker; import com.dtstack.flink.sql.table.SourceTableInfo; +import com.google.common.collect.Lists; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; @@ -31,6 +32,8 @@ import org.apache.flink.types.Row; import org.apache.flink.util.Preconditions; +import java.util.TimeZone; + /** * define watermarker * Date: 2018/6/29 @@ -54,6 +57,8 @@ public DataStream assignWaterMarker(DataStream dataStream, RowTypeInfo type int maxOutOrderness = sourceTableInfo.getMaxOutOrderness(); + String timeZone=sourceTableInfo.getTimeZone(); + String[] fieldNames = typeInfo.getFieldNames(); TypeInformation[] fieldTypes = typeInfo.getFieldTypes(); @@ -75,9 +80,9 @@ public DataStream assignWaterMarker(DataStream dataStream, RowTypeInfo type AbsCustomerWaterMarker waterMarker = null; if(fieldType.getTypeClass().getTypeName().equalsIgnoreCase("java.sql.Timestamp")){ - waterMarker = new CustomerWaterMarkerForTimeStamp(Time.milliseconds(maxOutOrderness), pos); + waterMarker = new CustomerWaterMarkerForTimeStamp(Time.milliseconds(maxOutOrderness), pos,timeZone); }else if(fieldType.getTypeClass().getTypeName().equalsIgnoreCase("java.lang.Long")){ - waterMarker = new CustomerWaterMarkerForLong(Time.milliseconds(maxOutOrderness), pos); + waterMarker = new CustomerWaterMarkerForLong(Time.milliseconds(maxOutOrderness), pos,timeZone); }else{ throw new IllegalArgumentException("not support type of " + fieldType + ", current only support(timestamp, long)."); } diff --git a/core/src/main/scala/com/dtstack/flink/App.scala b/core/src/main/scala/com/dtstack/flink/App.scala new file mode 100644 index 000000000..e74ccc28a --- /dev/null +++ b/core/src/main/scala/com/dtstack/flink/App.scala @@ -0,0 +1,11 @@ +package com.dtstack.flink + +/** + * Hello world! + * + */ +object App { + def main(args: Array[String]): Unit = { + println( "Hello World!" ) + } +} diff --git a/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java b/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java index 8eb09ee18..5abb66e2b 100644 --- a/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java +++ b/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java @@ -41,6 +41,51 @@ public class SideSqlExecTest { + /** + * 参考阿里Blink的cep sql语法,文档https://help.aliyun.com/document_detail/73845.html?spm=a2c4g.11186623.6.637.5cba27efFHjOSs + * @throws Exception + */ + @Test + public void testCepSql() throws Exception { + List paramList = Lists.newArrayList(); + paramList.add("-sql"); + String sqlContext = "CREATE table source(" + + "name varchar, " + + "price float, " + + "tax float, " + + "tstamp timestamp) " + + "with (" + + " type = 'kafka09',bootstrapServers = 'kudu1:9092',zookeeperQuorum = '172.16.8.107:2181/kafka', offsetReset = 'latest',topic = 'tranflow_input',parallelism = '1' " + + ");" + + "CREATE table sink(" + + "start_tstamp timestamp, " + + "bottom_tstamp timestamp, " + + "end_tstamp timestamp, " + + "bottom_total float, " + + "end_total float ) " + + "with (" + + " type = 'mysql',url = 'jdbc:mysql://172.16.8.104:3306/bank_test?charset=utf8',userName = 'dtstack',password = 'abc123',tableName = 'max_deposit_acct_base',cache = 'LRU',cacheSize = '10000',cacheTTLMs = '60000',parallelism = '1' " + + ");" + + "insert into sink " + + "select * from source " + + "MATCH_RECOGNIZE (\n" + + " MEASURES\n" + + " STRT.tstamp AS start_tstamp,\n" + + " LAST(DOWN.tstamp) AS bottom_tstamp,\n" + + " LAST(UP.tstamp) AS end_tstamp,\n" + + " FIRST(DOWN.price + DOWN.tax + 1) AS bottom_total,\n" + + " FIRST(UP.price + UP.tax) AS end_total" + + " ONE ROW PER MATCH\n" + + " PATTERN (STRT DOWN+ UP+)\n" + + " DEFINE\n" + + " DOWN AS DOWN.price < PREV(DOWN.price),\n" + + " UP AS UP.price > PREV(UP.price) AND UP.tax > LAST(DOWN.tax)\n" + + ") AS T" + ; + test(sqlContext); + } + + @Test public void testRunSideSql() throws Exception { //String runParam = "-sql CREATE+TABLE+MyTable(channel+STRING%2c+pv+INT%2c+xctime+bigint%2c+timeLeng+as+CHARACTER_LENGTH(channel)%2c++WATERMARK+FOR+xctime+AS+withOffset(xctime%2c+1000))+WITH+(+type%3d%27kafka09%27%2c+bootstrapServers%3d%27172.16.8.198%3a9092%27%2c+offsetReset%3d%27latest%27%2ctopic%3d%27nbTest1%27)%3bCREATE+TABLE+MyResult(channel+STRING%2c+pv+INT)+WITH+(+type%3d%27mysql%27%2c+url%3d%27jdbc%3amysql%3a%2f%2f172.16.8.104%3a3306%2ftest%3fcharset%3dutf8%27%2cuserName%3d%27dtstack%27%2cpassword%3d%27abc123%27%2c+tableName%3d%27pv%27)%3bcreate+table+sideTable(channel+String%2c+count+int%2c+PERIOD+FOR+SYSTEM_TIME)+WITH+(+type%3d%27mysql%27%2c+url%3d%27jdbc%3amysql%3a%2f%2f172.16.8.104%3a3306%2ftest%3fcharset%3dutf8%27%2cuserName%3d%27dtstack%27%2cpassword%3d%27abc123%27%2c+tableName%3d%27pv%27)%3binsert+into+MyResult+select+a.channel%2cb.pv+from+MyTable+a+join+sideTable+b+on+a.channel%3db.channel%3b -name xc -localSqlPluginPath D:\\gitspace\\flink-sql-plugin\\plugins -mode local -remoteSqlPluginPath /opt/dtstack/flinkplugin -confProp %7b%22time.characteristic%22%3a%22EventTime%22%7d -addjar %5b%22D%3a%5c%5cgitspace%5c%5crdos-execution-engine%5c%5c..%5c%5ctmp140%5c%5cflink14Test-1.0-SNAPSHOT.jar%22%5d"; diff --git a/docs/cassandraSide.md b/docs/cassandraSide.md new file mode 100644 index 000000000..131560047 --- /dev/null +++ b/docs/cassandraSide.md @@ -0,0 +1,85 @@ + +## 1.格式: +``` + CREATE TABLE tableName( + colName cloType, + ... + PRIMARY KEY(keyInfo), + PERIOD FOR SYSTEM_TIME + )WITH( + type ='cassandra', + address ='ip:port[,ip:port]', + userName='dbUserName', + password='dbPwd', + tableName='tableName', + database='database', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + parallelism ='1', + partitionedJoin='false' + ); +``` + +# 2.支持版本 + cassandra-3.6.x + +## 3.表结构定义 + + |参数名称|含义| + |----|---| + | tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)| + | colName | 列名称| + | colType | 列类型 [colType支持的类型](colType.md)| + | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| + | PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开| + +## 4.参数 + + |参数名称|含义|是否必填|默认值| + |----|---|---|----| + | type |表明 输出表类型 cassandra|是|| + | address | 连接cassandra数据库 jdbcUrl |是|| + | userName | cassandra连接用户名|否|| + | password | cassandra连接密码|否|| + | tableName | cassandra表名称|是|| + | database | cassandra表名称|是|| + | cache | 维表缓存策略(NONE/LRU)|否|NONE| + | partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| + | maxRequestsPerConnection | 每个连接最多允许64个并发请求|否|NONE| + | coreConnectionsPerHost | 和Cassandra集群里的每个机器都至少有2个连接|否|NONE| + | maxConnectionsPerHost | 和Cassandra集群里的每个机器都最多有6个连接|否|NONE| + | maxQueueSize | Cassandra队列大小|否|NONE| + | readTimeoutMillis | Cassandra读超时|否|NONE| + | connectTimeoutMillis | Cassandra连接超时|否|NONE| + | poolTimeoutMillis | Cassandra线程池超时|否|NONE| + + ---------- + > 缓存策略 + * NONE: 不做内存缓存 + * LRU: + * cacheSize: 缓存的条目数量 + * cacheTTLMs:缓存的过期时间(ms) + + +## 5.样例 +``` +create table sideTable( + CHANNEL varchar, + XCCOUNT int, + PRIMARY KEY(channel), + PERIOD FOR SYSTEM_TIME + )WITH( + type ='cassandra', + address ='172.21.32.1:9042,172.21.32.1:9042', + database ='test', + tableName ='sidetest', + cache ='LRU', + parallelism ='1', + partitionedJoin='false' + ); + + +``` + + diff --git a/docs/cassandraSink.md b/docs/cassandraSink.md new file mode 100644 index 000000000..8ea38e104 --- /dev/null +++ b/docs/cassandraSink.md @@ -0,0 +1,63 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='cassandra', + address ='ip:port[,ip:port]', + userName ='userName', + password ='pwd', + database ='databaseName', + tableName ='tableName', + parallelism ='parllNum' + ); + +``` + +## 2.支持版本 + cassandra-3.6.x + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName| 在 sql 中使用的名称;即注册到flink-table-env上的名称| +| colName | 列名称| +| colType | 列类型 [colType支持的类型](colType.md)| + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|----|----|----| +|type |表明 输出表类型 cassandra|是|| +|address | 连接cassandra数据库 jdbcUrl |是|| +|userName | cassandra连接用户名|否|| +|password | cassandra连接密码|否|| +|tableName | cassandra表名称|是|| +|database | cassandra表名称|是|| +|parallelism | 并行度设置|否|1| +|maxRequestsPerConnection | 每个连接最多允许64个并发请求|否|NONE| +|coreConnectionsPerHost | 和Cassandra集群里的每个机器都至少有2个连接|否|NONE| +|maxConnectionsPerHost | 和Cassandra集群里的每个机器都最多有6个连接|否|NONE| +|maxQueueSize | Cassandra队列大小|否|NONE| +|readTimeoutMillis | Cassandra读超时|否|NONE| +|connectTimeoutMillis | Cassandra连接超时|否|NONE| +|poolTimeoutMillis | Cassandra线程池超时|否|NONE| + +## 5.样例: +``` +CREATE TABLE MyResult( + channel VARCHAR, + pv VARCHAR + )WITH( + type ='cassandra', + address ='172.21.32.1:9042,172.21.32.1:9042', + userName ='dtstack', + password ='abc123', + database ='test', + tableName ='pv', + parallelism ='1' + ) + ``` \ No newline at end of file diff --git a/docs/consoleSink.md b/docs/consoleSink.md new file mode 100644 index 000000000..206d7faaa --- /dev/null +++ b/docs/consoleSink.md @@ -0,0 +1,50 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='console', + parallelism ='parllNum' + ); + +``` + +## 2.支持版本 +没有限制 + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName| 在 sql 中使用的名称;即注册到flink-table-env上的名称| +| colName | 列名称| +| colType | 列类型 [colType支持的类型](colType.md)| + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|----|----|----| +|type |表明 输出表类型[console]|是|| +| parallelism | 并行度设置|否|1| + +## 5.样例: +``` +CREATE TABLE MyResult( + name VARCHAR, + channel VARCHAR + )WITH( + type ='console', + parallelism ='1' + ) + ``` + + ## 6.输出结果: + ``` + +------+---------+ + | name | channel | + +------+---------+ + | aa | 02 | + +------+---------+ + ``` \ No newline at end of file diff --git a/docs/kafkaSource.md b/docs/kafkaSource.md index f382ba9ad..315aa9820 100644 --- a/docs/kafkaSource.md +++ b/docs/kafkaSource.md @@ -1,3 +1,4 @@ +# 一、json格式数据源 ## 1.格式: ``` 数据现在支持json格式{"xx":"bb","cc":"dd"} @@ -9,16 +10,20 @@ CREATE TABLE tableName( WATERMARK FOR colName AS withOffset( colName , delayTime ) )WITH( type ='kafka09', - bootstrapServers ='ip:port,ip:port...', - zookeeperQuorum ='ip:port,ip:port/zkparent', - offsetReset ='latest', - topic ='topicName', - parallelism ='parllNum' + kafka.bootstrap.servers ='ip:port,ip:port...', + kafka.zookeeper.quorum ='ip:port,ip:port/zkparent', + kafka.auto.offset.reset ='latest', + kafka.topic ='topicName', + parallelism ='parllNum', + --timezone='America/Los_Angeles', + timezone='Asia/Shanghai', + sourcedatatype ='json' #可不设置 ); ``` ## 2.支持的版本 - kafka09,kafka10,kafka11 + kafka08,kafka09,kafka10,kafka11 + **kafka读取和写入的版本必须一致,否则会有兼容性错误。** ## 3.表结构定义 @@ -35,12 +40,17 @@ CREATE TABLE tableName( |参数名称|含义|是否必填|默认值| |----|---|---|---| |type | kafka09 | 是|| -|bootstrapServers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| -|zookeeperQuorum | kafka zk地址信息(多个之间用逗号分隔)|是|| -|topic | 需要读取的 topic 名称|是|| -|offsetReset | 读取的topic 的offset初始位置[latest\|earliest\|指定offset值({"0":12312,"1":12321,"2":12312},{"partition_no":offset_value})]|否|latest| +|kafka.group.id | 需要读取的 groupId 名称|否|| +|kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| +|kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| +|kafka.topic | 需要读取的 topic 名称|是|| +|patterntopic | topic是否是正则表达式格式(true|false) |否| false +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest\|指定offset值({"0":12312,"1":12321,"2":12312},{"partition_no":offset_value})]|否|latest| |parallelism | 并行度设置|否|1| - +|sourcedatatype | 数据类型|否|json| +|timezone|时区设置[timezone支持的参数](timeZone.md)|否|'Asia/Shanghai' +**kafka相关参数可以自定义,使用kafka.开头即可。** + ## 5.样例: ``` CREATE TABLE MyTable( @@ -51,10 +61,230 @@ CREATE TABLE MyTable( CHARACTER_LENGTH(channel) AS timeLeng )WITH( type ='kafka09', - bootstrapServers ='172.16.8.198:9092', - zookeeperQuorum ='172.16.8.198:2181/kafka', - offsetReset ='latest', - topic ='nbTest1', - parallelism ='1' + kafka.bootstrap.servers ='172.16.8.198:9092', + kafka.zookeeper.quorum ='172.16.8.198:2181/kafka', + kafka.auto.offset.reset ='latest', + kafka.topic ='nbTest1,nbTest2,nbTest3', + --kafka.topic ='mqTest.*', + --patterntopic='true' + parallelism ='1', + sourcedatatype ='json' #可不设置 ); ``` +# 二、csv格式数据源 +根据字段分隔符进行数据分隔,按顺序匹配sql中配置的列。如数据分隔列数和sql中配置的列数相等直接匹配;如不同参照lengthcheckpolicy策略处理。 +## 1.参数: + +|参数名称|含义|是否必填|默认值| +|----|---|---|---| +|type | kafka09 | 是|| +|kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| +|kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| +|kafka.topic | 需要读取的 topic 名称|是|| +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest]|否|latest| +|parallelism | 并行度设置 |否|1| +|sourcedatatype | 数据类型|是 |csv| +|fielddelimiter | 字段分隔符|是 || +|lengthcheckpolicy | 单行字段条数检查策略 |否|可选,默认为SKIP,其它可选值为EXCEPTION、PAD。SKIP:字段数目不符合时跳过 。EXCEPTION:字段数目不符合时抛出异常。PAD:按顺序填充,不存在的置为null。| +**kafka相关参数可以自定义,使用kafka.开头即可。** + +## 2.样例: +``` +CREATE TABLE MyTable( + name varchar, + channel varchar, + pv INT, + xctime bigint, + CHARACTER_LENGTH(channel) AS timeLeng + )WITH( + type ='kafka09', + kafka.bootstrap.servers ='172.16.8.198:9092', + kafka.zookeeper.quorum ='172.16.8.198:2181/kafka', + kafka.auto.offset.reset ='latest', + kafka.topic ='nbTest1', + --kafka.topic ='mqTest.*', + --kafka.topicIsPattern='true' + parallelism ='1', + sourcedatatype ='csv', + fielddelimiter ='\|', + lengthcheckpolicy = 'PAD' + ); + ``` +# 三、text格式数据源UDF自定义拆分 +Kafka源表数据解析流程:Kafka Source Table -> UDTF ->Realtime Compute -> SINK。从Kakfa读入的数据,都是VARBINARY(二进制)格式,对读入的每条数据,都需要用UDTF将其解析成格式化数据。 + 与其他格式不同,本格式定义DDL必须与以下SQL一摸一样,表中的五个字段顺序务必保持一致: + +## 1. 定义源表,注意:kafka源表DDL字段必须与以下例子一模一样。WITH中参数可改。 +``` +create table kafka_stream( + _topic STRING, + _messageKey STRING, + _message STRING, + _partition INT, + _offset BIGINT, +) with ( + type ='kafka09', + kafka.bootstrap.servers ='172.16.8.198:9092', + kafka.zookeeper.quorum ='172.16.8.198:2181/kafka', + kafka.auto.offset.reset ='latest', + kafka.topic ='nbTest1', + parallelism ='1', + sourcedatatype='text' + ) +``` +## 2.参数: + +|参数名称|含义|是否必填|默认值| +|----|---|---|---| +|type | kafka09 | 是|| +|kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| +|kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| +|kafka.topic | 需要读取的 topic 名称|是|| +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest]|否|latest| +|parallelism | 并行度设置|否|1| +|sourcedatatype | 数据类型|否|text| +**kafka相关参数可以自定义,使用kafka.开头即可。** + +## 2.自定义: +从kafka读出的数据,需要进行窗口计算。 按照实时计算目前的设计,滚窗/滑窗等窗口操作,需要(且必须)在源表DDL上定义Watermark。Kafka源表比较特殊。如果要以kafka中message字段中的的Event Time进行窗口操作, +需要先从message字段,使用UDX解析出event time,才能定义watermark。 在kafka源表场景中,需要使用计算列。 假设,kafka中写入的数据如下: +2018-11-11 00:00:00|1|Anna|female整个计算流程为:Kafka SOURCE->UDTF->Realtime Compute->RDS SINK(单一分隔符可直接使用类csv格式模板,自定义适用于更复杂的数据类型,本说明只做参考) + +**SQL** +``` +-- 定义解析Kakfa message的UDTF + CREATE FUNCTION kafkapaser AS 'com.XXXX.kafkaUDTF'; + CREATE FUNCTION kafkaUDF AS 'com.XXXX.kafkaUDF'; + -- 定义源表,注意:kafka源表DDL字段必须与以下例子一模一样。WITH中参数可改。 + create table kafka_src ( + _topic STRING, + _messageKey STRING, + _message STRING, + _partition INT, + _offset BIGINT, + ctime AS TO_TIMESTAMP(kafkaUDF(_message)), -- 定义计算列,计算列可理解为占位符,源表中并没有这一列,其中的数据可经过下游计算得出。注意计算里的类型必须为timestamp才能在做watermark。 + watermark for ctime as withoffset(ctime,0) -- 在计算列上定义watermark + ) WITH ( + type = 'kafka010', -- Kafka Source类型,与Kafka版本强相关,目前支持的Kafka版本请参考本文档 + topic = 'test_kafka_topic', + ... + ); + create table rds_sink ( + name VARCHAR, + age INT, + grade VARCHAR, + updateTime TIMESTAMP + ) WITH( + type='mysql', + url='jdbc:mysql://localhost:3306/test', + tableName='test4', + userName='test', + password='XXXXXX' + ); + -- 使用UDTF,将二进制数据解析成格式化数据 + CREATE VIEW input_view ( + name, + age, + grade, + updateTime + ) AS + SELECT + COUNT(*) as cnt, + T.ctime, + T.order, + T.name, + T.sex + from + kafka_src as S, + LATERAL TABLE (kafkapaser _message)) as T ( + ctime, + order, + name, + sex + ) + Group BY T.sex, + TUMBLE(ROWTIME, INTERVAL '1' MINUTE); + -- 对input_view中输出的数据做计算 + CREATE VIEW view2 ( + cnt, + sex + ) AS + SELECT + COUNT(*) as cnt, + T.sex + from + input_view + Group BY sex, TUMBLE(ROWTIME, INTERVAL '1' MINUTE); + -- 使用解析出的格式化数据进行计算,并将结果输出到RDS中 + insert into rds_sink + SELECT + cnt,sex + from view2; + ``` +**UDF&UDTF** +``` +package com.XXXX; + import com.XXXX.fastjson.JSONObject; + import org.apache.flink.table.functions.TableFunction; + import org.apache.flink.table.types.DataType; + import org.apache.flink.table.types.DataTypes; + import org.apache.flink.types.Row; + import java.io.UnsupportedEncodingException; + /** + 以下例子解析输入Kafka中的JSON字符串,并将其格式化输出 + **/ + public class kafkaUDTF extends TableFunction { + public void eval(byte[] message) { + try { + // 读入一个二进制数据,并将其转换为String格式 + String msg = new String(message, "UTF-8"); + // 提取JSON Object中各字段 + String ctime = Timestamp.valueOf(data.split('\\|')[0]); + String order = data.split('\\|')[1]; + String name = data.split('\\|')[2]; + String sex = data.split('\\|')[3]; + // 将解析出的字段放到要输出的Row()对象 + Row row = new Row(4); + row.setField(0, ctime); + row.setField(1, age); + row.setField(2, grade); + row.setField(3, updateTime); + System.out.println("Kafka message str ==>" + row.toString()); + // 输出一行 + collect(row); + } catch (ClassCastException e) { + System.out.println("Input data format error. Input data " + msg + "is not json string"); + } + } catch (UnsupportedEncodingException e) { + e.printStackTrace(); + } + } + @Override + // 如果返回值是Row,就必须重载实现这个方法,显式地告诉系统返回的字段类型 + // 定义输出Row()对象的字段类型 + public DataType getResultType(Object[] arguments, Class[] argTypes) { + return DataTypes.createRowType(DataTypes.TIMESTAMP,DataTypes.STRING, DataTypes.Integer, DataTypes.STRING,DataTypes.STRING); + } + } + + package com.dp58; + package com.dp58.sql.udx; + import org.apache.flink.table.functions.FunctionContext; + import org.apache.flink.table.functions.ScalarFunction; + public class KafkaUDF extends ScalarFunction { + // 可选,open方法可以不写 + // 需要import org.apache.flink.table.functions.FunctionContext; + public String eval(byte[] message) { + // 读入一个二进制数据,并将其转换为String格式 + String msg = new String(message, "UTF-8"); + return msg.split('\\|')[0]; + } + public long eval(String b, String c) { + return eval(b) + eval(c); + } + //可选,close方法可以不写 + @Override + public void close() { + } + } + ``` diff --git a/docs/mysqlSide.md b/docs/mysqlSide.md index d0fec5832..b17c72bb6 100644 --- a/docs/mysqlSide.md +++ b/docs/mysqlSide.md @@ -52,7 +52,9 @@ * LRU: * cacheSize: 缓存的条目数量 * cacheTTLMs:缓存的过期时间(ms) - + * cacheMode: (unordered|ordered)异步加载是有序还是无序,默认有序。 + * asyncCapacity:异步请求容量,默认1000 + * asyncTimeout:异步请求超时时间,默认10000毫秒 ## 5.样例 ``` @@ -70,6 +72,9 @@ create table sideTable( cache ='LRU', cacheSize ='10000', cacheTTLMs ='60000', + cacheMode='unordered', + asyncCapacity='1000', + asyncTimeout='10000' parallelism ='1', partitionedJoin='false' ); diff --git a/docs/serverSocketSource.md b/docs/serverSocketSource.md new file mode 100644 index 000000000..cffdc8dd2 --- /dev/null +++ b/docs/serverSocketSource.md @@ -0,0 +1,73 @@ + +## 1.数据格式: +``` +数据现在只支持json格式 {"xx":"bb","cc":"dd"} + +CREATE TABLE MyTable( + channel varchar, + pv int, + xctime date, + xtime date + + )WITH( + type='serversocket', + host='127.0.0.1', + port='8888', + delimiter=';', + maxNumRetries='100' + ); +``` + + +## 2.参数: + +|参数名称|含义|是否必填|默认值| +|----|---|---|---| +|type | serversocket | 是|| +|host | server host|是|| +|port | server port|是|| +|delimiter| 每条json数据的分割符(比如:;)|是|| +|maxNumRetries| 最大重连次数 (大于0)|是|| + + +## 3.Server端样例: +``` +String str = "{\"CHANNEL\":\"xc3\",\"pv\":1234567,\"xdate\":\"2018-12-07\",\"xtime\":\"2018-12-15\"};"; + + +public class TimeServerHandler implements Runnable { + Socket socket; + + String str = "{\"CHANNEL\":\"xc3\",\"pv\":1234567,\"xdate\":\"2018-12-07\",\"xtime\":\"2018-12-15\"};"; + + public TimeServerHandler(Socket socket) { + this.socket = socket; + } + + public void run() { + PrintWriter out = null; + try { + out = new PrintWriter(this.socket.getOutputStream(), true); + while (true) { + Thread.sleep(3000); + out.println(str); + } + } catch (IOException e) { + e.printStackTrace(); + + if (out != null) { + out.close(); + } + if (socket != null) { + try { + socket.close(); + } catch (IOException e1) { + e1.printStackTrace(); + } + } + } catch (InterruptedException e) { + e.printStackTrace(); + } + } +} +``` diff --git a/docs/timeZone.md b/docs/timeZone.md new file mode 100644 index 000000000..ec6aa0ca3 --- /dev/null +++ b/docs/timeZone.md @@ -0,0 +1,601 @@ +* Africa/Abidjan +* Africa/Accra +* Africa/Addis_Ababa +* Africa/Algiers +* Africa/Asmara +* Africa/Asmera +* Africa/Bamako +* Africa/Bangui +* Africa/Banjul +* Africa/Bissau +* Africa/Blantyre +* Africa/Brazzaville +* Africa/Bujumbura +* Africa/Cairo +* Africa/Casablanca +* Africa/Ceuta +* Africa/Conakry +* Africa/Dakar +* Africa/Dar_es_Salaam +* Africa/Djibouti +* Africa/Douala +* Africa/El_Aaiun +* Africa/Freetown +* Africa/Gaborone +* Africa/Harare +* Africa/Johannesburg +* Africa/Juba +* Africa/Kampala +* Africa/Khartoum +* Africa/Kigali +* Africa/Kinshasa +* Africa/Lagos +* Africa/Libreville +* Africa/Lome +* Africa/Luanda +* Africa/Lubumbashi +* Africa/Lusaka +* Africa/Malabo +* Africa/Maputo +* Africa/Maseru +* Africa/Mbabane +* Africa/Mogadishu +* Africa/Monrovia +* Africa/Nairobi +* Africa/Ndjamena +* Africa/Niamey +* Africa/Nouakchott +* Africa/Ouagadougou +* Africa/Porto-Novo +* Africa/Sao_Tome +* Africa/Timbuktu +* Africa/Tripoli +* Africa/Tunis +* Africa/Windhoek +* America/Adak +* America/Anchorage +* America/Anguilla +* America/Antigua +* America/Araguaina +* America/Argentina/Buenos_Aires +* America/Argentina/Catamarca +* America/Argentina/ComodRivadavia +* America/Argentina/Cordoba +* America/Argentina/Jujuy +* America/Argentina/La_Rioja +* America/Argentina/Mendoza +* America/Argentina/Rio_Gallegos +* America/Argentina/Salta +* America/Argentina/San_Juan +* America/Argentina/San_Luis +* America/Argentina/Tucuman +* America/Argentina/Ushuaia +* America/Aruba +* America/Asuncion +* America/Atikokan +* America/Atka +* America/Bahia +* America/Bahia_Banderas +* America/Barbados +* America/Belem +* America/Belize +* America/Blanc-Sablon +* America/Boa_Vista +* America/Bogota +* America/Boise +* America/Buenos_Aires +* America/Cambridge_Bay +* America/Campo_Grande +* America/Cancun +* America/Caracas +* America/Catamarca +* America/Cayenne +* America/Cayman +* America/Chicago +* America/Chihuahua +* America/Coral_Harbour +* America/Cordoba +* America/Costa_Rica +* America/Creston +* America/Cuiaba +* America/Curacao +* America/Danmarkshavn +* America/Dawson +* America/Dawson_Creek +* America/Denver +* America/Detroit +* America/Dominica +* America/Edmonton +* America/Eirunepe +* America/El_Salvador +* America/Ensenada +* America/Fort_Nelson +* America/Fort_Wayne +* America/Fortaleza +* America/Glace_Bay +* America/Godthab +* America/Goose_Bay +* America/Grand_Turk +* America/Grenada +* America/Guadeloupe +* America/Guatemala +* America/Guayaquil +* America/Guyana +* America/Halifax +* America/Havana +* America/Hermosillo +* America/Indiana/Indianapolis +* America/Indiana/Knox +* America/Indiana/Marengo +* America/Indiana/Petersburg +* America/Indiana/Tell_City +* America/Indiana/Vevay +* America/Indiana/Vincennes +* America/Indiana/Winamac +* America/Indianapolis +* America/Inuvik +* America/Iqaluit +* America/Jamaica +* America/Jujuy +* America/Juneau +* America/Kentucky/Louisville +* America/Kentucky/Monticello +* America/Knox_IN +* America/Kralendijk +* America/La_Paz +* America/Lima +* America/Los_Angeles +* America/Louisville +* America/Lower_Princes +* America/Maceio +* America/Managua +* America/Manaus +* America/Marigot +* America/Martinique +* America/Matamoros +* America/Mazatlan +* America/Mendoza +* America/Menominee +* America/Merida +* America/Metlakatla +* America/Mexico_City +* America/Miquelon +* America/Moncton +* America/Monterrey +* America/Montevideo +* America/Montreal +* America/Montserrat +* America/Nassau +* America/New_York +* America/Nipigon +* America/Nome +* America/Noronha +* America/North_Dakota/Beulah +* America/North_Dakota/Center +* America/North_Dakota/New_Salem +* America/Ojinaga +* America/Panama +* America/Pangnirtung +* America/Paramaribo +* America/Phoenix +* America/Port-au-Prince +* America/Port_of_Spain +* America/Porto_Acre +* America/Porto_Velho +* America/Puerto_Rico +* America/Punta_Arenas +* America/Rainy_River +* America/Rankin_Inlet +* America/Recife +* America/Regina +* America/Resolute +* America/Rio_Branco +* America/Rosario +* America/Santa_Isabel +* America/Santarem +* America/Santiago +* America/Santo_Domingo +* America/Sao_Paulo +* America/Scoresbysund +* America/Shiprock +* America/Sitka +* America/St_Barthelemy +* America/St_Johns +* America/St_Kitts +* America/St_Lucia +* America/St_Thomas +* America/St_Vincent +* America/Swift_Current +* America/Tegucigalpa +* America/Thule +* America/Thunder_Bay +* America/Tijuana +* America/Toronto +* America/Tortola +* America/Vancouver +* America/Virgin +* America/Whitehorse +* America/Winnipeg +* America/Yakutat +* America/Yellowknife +* Antarctica/Casey +* Antarctica/Davis +* Antarctica/DumontDUrville +* Antarctica/Macquarie +* Antarctica/Mawson +* Antarctica/McMurdo +* Antarctica/Palmer +* Antarctica/Rothera +* Antarctica/South_Pole +* Antarctica/Syowa +* Antarctica/Troll +* Antarctica/Vostok +* Arctic/Longyearbyen +* Asia/Aden +* Asia/Almaty +* Asia/Amman +* Asia/Anadyr +* Asia/Aqtau +* Asia/Aqtobe +* Asia/Ashgabat +* Asia/Ashkhabad +* Asia/Atyrau +* Asia/Baghdad +* Asia/Bahrain +* Asia/Baku +* Asia/Bangkok +* Asia/Barnaul +* Asia/Beirut +* Asia/Bishkek +* Asia/Brunei +* Asia/Calcutta +* Asia/Chita +* Asia/Choibalsan +* Asia/Chongqing +* Asia/Chungking +* Asia/Colombo +* Asia/Dacca +* Asia/Damascus +* Asia/Dhaka +* Asia/Dili +* Asia/Dubai +* Asia/Dushanbe +* Asia/Famagusta +* Asia/Gaza +* Asia/Harbin +* Asia/Hebron +* Asia/Ho_Chi_Minh +* Asia/Hong_Kong +* Asia/Hovd +* Asia/Irkutsk +* Asia/Istanbul +* Asia/Jakarta +* Asia/Jayapura +* Asia/Jerusalem +* Asia/Kabul +* Asia/Kamchatka +* Asia/Karachi +* Asia/Kashgar +* Asia/Kathmandu +* Asia/Katmandu +* Asia/Khandyga +* Asia/Kolkata +* Asia/Krasnoyarsk +* Asia/Kuala_Lumpur +* Asia/Kuching +* Asia/Kuwait +* Asia/Macao +* Asia/Macau +* Asia/Magadan +* Asia/Makassar +* Asia/Manila +* Asia/Muscat +* Asia/Nicosia +* Asia/Novokuznetsk +* Asia/Novosibirsk +* Asia/Omsk +* Asia/Oral +* Asia/Phnom_Penh +* Asia/Pontianak +* Asia/Pyongyang +* Asia/Qatar +* Asia/Qyzylorda +* Asia/Rangoon +* Asia/Riyadh +* Asia/Saigon +* Asia/Sakhalin +* Asia/Samarkand +* Asia/Seoul +* Asia/Shanghai +* Asia/Singapore +* Asia/Srednekolymsk +* Asia/Taipei +* Asia/Tashkent +* Asia/Tbilisi +* Asia/Tehran +* Asia/Tel_Aviv +* Asia/Thimbu +* Asia/Thimphu +* Asia/Tokyo +* Asia/Tomsk +* Asia/Ujung_Pandang +* Asia/Ulaanbaatar +* Asia/Ulan_Bator +* Asia/Urumqi +* Asia/Ust-Nera +* Asia/Vientiane +* Asia/Vladivostok +* Asia/Yakutsk +* Asia/Yangon +* Asia/Yekaterinburg +* Asia/Yerevan +* Atlantic/Azores +* Atlantic/Bermuda +* Atlantic/Canary +* Atlantic/Cape_Verde +* Atlantic/Faeroe +* Atlantic/Faroe +* Atlantic/Jan_Mayen +* Atlantic/Madeira +* Atlantic/Reykjavik +* Atlantic/South_Georgia +* Atlantic/St_Helena +* Atlantic/Stanley +* Australia/ACT +* Australia/Adelaide +* Australia/Brisbane +* Australia/Broken_Hill +* Australia/Canberra +* Australia/Currie +* Australia/Darwin +* Australia/Eucla +* Australia/Hobart +* Australia/LHI +* Australia/Lindeman +* Australia/Lord_Howe +* Australia/Melbourne +* Australia/NSW +* Australia/North +* Australia/Perth +* Australia/Queensland +* Australia/South +* Australia/Sydney +* Australia/Tasmania +* Australia/Victoria +* Australia/West +* Australia/Yancowinna +* Brazil/Acre +* Brazil/DeNoronha +* Brazil/East +* Brazil/West +* CET +* CST6CDT +* Canada/Atlantic +* Canada/Central +* Canada/Eastern +* Canada/Mountain +* Canada/Newfoundland +* Canada/Pacific +* Canada/Saskatchewan +* Canada/Yukon +* Chile/Continental +* Chile/EasterIsland +* Cuba +* EET +* EST5EDT +* Egypt +* Eire +* Etc/GMT +* Etc/GMT+0 +* Etc/GMT+1 +* Etc/GMT+10 +* Etc/GMT+11 +* Etc/GMT+12 +* Etc/GMT+2 +* Etc/GMT+3 +* Etc/GMT+4 +* Etc/GMT+5 +* Etc/GMT+6 +* Etc/GMT+7 +* Etc/GMT+8 +* Etc/GMT+9 +* Etc/GMT-0 +* Etc/GMT-1 +* Etc/GMT-10 +* Etc/GMT-11 +* Etc/GMT-12 +* Etc/GMT-13 +* Etc/GMT-14 +* Etc/GMT-2 +* Etc/GMT-3 +* Etc/GMT-4 +* Etc/GMT-5 +* Etc/GMT-6 +* Etc/GMT-7 +* Etc/GMT-8 +* Etc/GMT-9 +* Etc/GMT0 +* Etc/Greenwich +* Etc/UCT +* Etc/UTC +* Etc/Universal +* Etc/Zulu +* Europe/Amsterdam +* Europe/Andorra +* Europe/Astrakhan +* Europe/Athens +* Europe/Belfast +* Europe/Belgrade +* Europe/Berlin +* Europe/Bratislava +* Europe/Brussels +* Europe/Bucharest +* Europe/Budapest +* Europe/Busingen +* Europe/Chisinau +* Europe/Copenhagen +* Europe/Dublin +* Europe/Gibraltar +* Europe/Guernsey +* Europe/Helsinki +* Europe/Isle_of_Man +* Europe/Istanbul +* Europe/Jersey +* Europe/Kaliningrad +* Europe/Kiev +* Europe/Kirov +* Europe/Lisbon +* Europe/Ljubljana +* Europe/London +* Europe/Luxembourg +* Europe/Madrid +* Europe/Malta +* Europe/Mariehamn +* Europe/Minsk +* Europe/Monaco +* Europe/Moscow +* Europe/Nicosia +* Europe/Oslo +* Europe/Paris +* Europe/Podgorica +* Europe/Prague +* Europe/Riga +* Europe/Rome +* Europe/Samara +* Europe/San_Marino +* Europe/Sarajevo +* Europe/Saratov +* Europe/Simferopol +* Europe/Skopje +* Europe/Sofia +* Europe/Stockholm +* Europe/Tallinn +* Europe/Tirane +* Europe/Tiraspol +* Europe/Ulyanovsk +* Europe/Uzhgorod +* Europe/Vaduz +* Europe/Vatican +* Europe/Vienna +* Europe/Vilnius +* Europe/Volgograd +* Europe/Warsaw +* Europe/Zagreb +* Europe/Zaporozhye +* Europe/Zurich +* GB +* GB-Eire +* GMT +* GMT0 +* Greenwich +* Hongkong +* Iceland +* Indian/Antananarivo +* Indian/Chagos +* Indian/Christmas +* Indian/Cocos +* Indian/Comoro +* Indian/Kerguelen +* Indian/Mahe +* Indian/Maldives +* Indian/Mauritius +* Indian/Mayotte +* Indian/Reunion +* Iran +* Israel +* Jamaica +* Japan +* Kwajalein +* Libya +* MET +* MST7MDT +* Mexico/BajaNorte +* Mexico/BajaSur +* Mexico/General +* NZ +* NZ-CHAT +* Navajo +* PRC +* PST8PDT +* Pacific/Apia +* Pacific/Auckland +* Pacific/Bougainville +* Pacific/Chatham +* Pacific/Chuuk +* Pacific/Easter +* Pacific/Efate +* Pacific/Enderbury +* Pacific/Fakaofo +* Pacific/Fiji +* Pacific/Funafuti +* Pacific/Galapagos +* Pacific/Gambier +* Pacific/Guadalcanal +* Pacific/Guam +* Pacific/Honolulu +* Pacific/Johnston +* Pacific/Kiritimati +* Pacific/Kosrae +* Pacific/Kwajalein +* Pacific/Majuro +* Pacific/Marquesas +* Pacific/Midway +* Pacific/Nauru +* Pacific/Niue +* Pacific/Norfolk +* Pacific/Noumea +* Pacific/Pago_Pago +* Pacific/Palau +* Pacific/Pitcairn +* Pacific/Pohnpei +* Pacific/Ponape +* Pacific/Port_Moresby +* Pacific/Rarotonga +* Pacific/Saipan +* Pacific/Samoa +* Pacific/Tahiti +* Pacific/Tarawa +* Pacific/Tongatapu +* Pacific/Truk +* Pacific/Wake +* Pacific/Wallis +* Pacific/Yap +* Poland +* Portugal +* ROK +* Singapore +* SystemV/AST4 +* SystemV/AST4ADT +* SystemV/CST6 +* SystemV/CST6CDT +* SystemV/EST5 +* SystemV/EST5EDT +* SystemV/HST10 +* SystemV/MST7 +* SystemV/MST7MDT +* SystemV/PST8 +* SystemV/PST8PDT +* SystemV/YST9 +* SystemV/YST9YDT +* Turkey +* UCT +* US/Alaska +* US/Aleutian +* US/Arizona +* US/Central +* US/East-Indiana +* US/Eastern +* US/Hawaii +* US/Indiana-Starke +* US/Michigan +* US/Mountain +* US/Pacific +* US/Pacific-New +* US/Samoa +* UTC +* Universal +* W-SU +* WET +* Zulu + + diff --git a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java index 29fca4bee..c9adc87b9 100644 --- a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java +++ b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java @@ -66,7 +66,7 @@ public HbaseAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List sideInputList = (Map) sideInput; Row row = new Row(sideInfo.getOutFieldInfoList().size()); for(Map.Entry entry : sideInfo.getInFieldIndex().entrySet()){ @@ -170,7 +170,7 @@ private void loadData(Map> tmpCache) throws SQLExcep StringBuilder key = new StringBuilder(); key.append(family).append(":").append(qualifier); - kv.put(aliasNameInversion.get(key.toString().toUpperCase()), value); + kv.put(aliasNameInversion.get(key.toString()), value); } tmpCache.put(new String(r.getRow()), kv); } diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncReqRow.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncReqRow.java index fe227bb6a..6e82e4109 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncReqRow.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncReqRow.java @@ -160,7 +160,7 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except } @Override - protected Row fillData(Row input, Object sideInput){ + public Row fillData(Row input, Object sideInput){ List sideInputList = (List) sideInput; Row row = new Row(sideInfo.getOutFieldInfoList().size()); diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java index d077f3493..d79f67ec5 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java @@ -98,9 +98,9 @@ private String dealOneRow(ArrayList> args, String rowKeyStr, String mapKey = cf + ":" + col; //The table format defined using different data type conversion byte - String colType = colRefType.get(mapKey.toUpperCase()); + String colType = colRefType.get(mapKey); Object val = HbaseUtils.convertByte(keyValue.value(), colType); - sideMap.put(mapKey.toUpperCase(), val); + sideMap.put(mapKey, val); } if (oneRow.size() > 0) { diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java index a85f58c4b..4a4b60ff6 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java @@ -73,9 +73,9 @@ public void asyncGetData(String tableName, String rowKeyStr, Row input, ResultFu String col = new String(keyValue.qualifier()); String mapKey = cf + ":" + col; //The table format defined using different data type conversion byte - String colType = colRefType.get(mapKey.toUpperCase()); + String colType = colRefType.get(mapKey); Object val = HbaseUtils.convertByte(keyValue.value(), colType); - sideMap.put(mapKey.toUpperCase(), val); + sideMap.put(mapKey, val); } if(arg.size() > 0){ diff --git a/kafka08/kafka08-sink/pom.xml b/kafka08/kafka08-sink/pom.xml new file mode 100644 index 000000000..47391d182 --- /dev/null +++ b/kafka08/kafka08-sink/pom.xml @@ -0,0 +1,94 @@ + + + + sql.kafka08 + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kafka08 + jar + + kafka08-sink + http://maven.apache.org + + + + org.apache.flink + flink-connector-kafka-0.8_2.11 + ${flink.version} + + + org.apache.flink + flink-json + ${flink.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java new file mode 100644 index 000000000..53febb240 --- /dev/null +++ b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java @@ -0,0 +1,128 @@ +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.table.shaded.org.apache.commons.lang.StringEscapeUtils; +import org.apache.flink.types.Row; +import org.apache.flink.types.StringValue; + +import java.io.IOException; + +import static org.apache.flink.api.java.typeutils.runtime.NullMaskUtils.writeNullMask; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ +@Internal +public final class CustomerCsvSerialization extends TypeSerializerSingleton { + + private static final long serialVersionUID = 1L; + + private String fieldDelimiter = "\u0001"; + private TypeInformation[] fieldTypes; + private TypeSerializer[] fieldSerializers; + private static final Row EMPTY = null; + + public CustomerCsvSerialization(String fielddelimiter,TypeInformation[] fieldTypes) { + this.fieldDelimiter = fielddelimiter; + this.fieldTypes = fieldTypes; + this.fieldSerializers = (TypeSerializer[])createSerializer(new ExecutionConfig()); + } + + public TypeSerializer[] createSerializer(ExecutionConfig config) { + int len = fieldTypes.length; + TypeSerializer[] fieldSerializers = new TypeSerializer[len]; + for (int i = 0; i < len; i++) { + fieldSerializers[i] = fieldTypes[i].createSerializer(config); + } + return fieldSerializers; + } + + @Override + public boolean isImmutableType() { + return true; + } + + @Override + public Row createInstance() { + return EMPTY; + } + + @Override + public Row copy(Row from) { + return null; + } + + @Override + public Row copy(Row from, Row reuse) { + return null; + } + + @Override + public int getLength() { + return -1; + } + + @Override + public void serialize(Row record, DataOutputView target) throws IOException { + int len = fieldSerializers.length; + + if (record.getArity() != len) { + throw new RuntimeException("Row arity of from does not match serializers."); + } + + // write a null mask + writeNullMask(len, record, target); + + // serialize non-null fields + StringBuffer stringBuffer = new StringBuffer(); + for (int i = 0; i < len; i++) { + Object o = record.getField(i); + if (o != null) { + //fieldSerializers[i].serialize(o, target); + stringBuffer.append(o); + } + if(i != len-1){ + stringBuffer.append(StringEscapeUtils.unescapeJava(fieldDelimiter)); + //fieldSerializers[i].serialize(fieldDelimiter, target); + } + } + StringValue.writeString(stringBuffer.toString(), target); + } + + @Override + public Row deserialize(DataInputView source) throws IOException { + return null; + } + + @Override + public Row deserialize(Row reuse, DataInputView source) throws IOException { + return null; + } + + @Override + public void copy(DataInputView source, DataOutputView target) throws IOException { + StringValue.copyString(source, target); + } + + @Override + public boolean canEqual(Object obj) { + return obj instanceof CustomerCsvSerialization; + } + + @Override + protected boolean isCompatibleSerializationFormatIdentifier(String identifier) { + return super.isCompatibleSerializationFormatIdentifier(identifier) + || identifier.equals(StringValue.class.getCanonicalName()); + } +} diff --git a/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java new file mode 100644 index 000000000..4aa7f49fa --- /dev/null +++ b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowSerializationSchema; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.connectors.kafka.Kafka08TableSink; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSink; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.api.TableSchemaBuilder; +import org.apache.flink.table.sinks.AppendStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ +public class KafkaSink implements AppendStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + + protected TypeInformation[] fieldTypes; + + /** The schema of the table. */ + private TableSchema schema; + + /** The Kafka topic to write to. */ + protected String topic; + + /** Properties for the Kafka producer. */ + protected Properties properties; + + /** Serialization schema for encoding records to Kafka. */ + protected SerializationSchema serializationSchema; + + /** Partitioner to select Kafka partition for each item. */ + protected Optional> partitioner; + + @Override + public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { + KafkaSinkTableInfo kafka08SinkTableInfo = (KafkaSinkTableInfo) targetTableInfo; + this.topic = kafka08SinkTableInfo.getKafkaParam("topic"); + + Properties props = new Properties(); + for (String key:kafka08SinkTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka08SinkTableInfo.getKafkaParam(key)); + } + this.properties = props; + this.partitioner = Optional.of(new FlinkFixedPartitioner<>()); + this.fieldNames = kafka08SinkTableInfo.getFields(); + TypeInformation[] types = new TypeInformation[kafka08SinkTableInfo.getFields().length]; + for(int i = 0; i< kafka08SinkTableInfo.getFieldClasses().length; i++){ + types[i] = TypeInformation.of(kafka08SinkTableInfo.getFieldClasses()[i]); + } + this.fieldTypes = types; + + TableSchemaBuilder schemaBuilder = TableSchema.builder(); + for (int i=0;i dataStream) { + KafkaTableSink kafkaTableSink = new Kafka08TableSink( + schema, + topic, + properties, + partitioner, + serializationSchema + ); + + kafkaTableSink.emitDataStream(dataStream); + } + + @Override + public TypeInformation getOutputType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public TableSink configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + +} diff --git a/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java new file mode 100644 index 000000000..2b6c50512 --- /dev/null +++ b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KafkaSinkTableInfo kafka08SinkTableInfo = new KafkaSinkTableInfo(); + kafka08SinkTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka08SinkTableInfo); + kafka08SinkTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase()))); + if (props.get(KafkaSinkTableInfo.SINK_DATA_TYPE) != null) { + kafka08SinkTableInfo.setSinkDataType(props.get(KafkaSinkTableInfo.SINK_DATA_TYPE).toString()); + } + if (props.get(KafkaSinkTableInfo.FIELD_DELINITER) != null) { + kafka08SinkTableInfo.setFieldDelimiter(props.get(KafkaSinkTableInfo.FIELD_DELINITER).toString()); + } + + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka08SinkTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka08SinkTableInfo; + } +} diff --git a/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java new file mode 100644 index 000000000..5dae21742 --- /dev/null +++ b/kafka08/kafka08-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkTableInfo extends TargetTableInfo { + //version + private static final String CURR_TYPE = "kafka08"; + + public KafkaSinkTableInfo(){ + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap(); + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + // Preconditions.checkNotNull(kafkaParam.get("groupId"), "kafka of groupId is required"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka08/kafka08-source/pom.xml b/kafka08/kafka08-source/pom.xml new file mode 100644 index 000000000..64264fd46 --- /dev/null +++ b/kafka08/kafka08-source/pom.xml @@ -0,0 +1,91 @@ + + + + sql.kafka08 + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.source.kafka08 + jar + + kafka08-source + http://maven.apache.org + + + + org.apache.flink + flink-connector-kafka-0.8_2.11 + ${flink.version} + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java new file mode 100644 index 000000000..bfbffdf14 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -0,0 +1,135 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.source.kafka; + +import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCommonConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCsvConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerJsonConsumer; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; +import com.dtstack.flink.sql.source.kafka.table.KafkaSourceTableInfo; +import com.dtstack.flink.sql.table.SourceTableInfo; +import com.dtstack.flink.sql.util.DtStringUtil; +import com.dtstack.flink.sql.util.PluginUtil; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.java.StreamTableEnvironment; +import org.apache.flink.types.Row; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +public class KafkaSource implements IStreamSourceGener { + + private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; + + /** + * Get kafka data source, you need to provide the data field names, data types + * If you do not specify auto.offset.reset, the default use groupoffset + * + * @param sourceTableInfo + * @return + */ + @SuppressWarnings("rawtypes") + @Override + public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { + KafkaSourceTableInfo kafka08SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; + String topicName = kafka08SourceTableInfo.getKafkaParam("topic"); + String offsetReset = kafka08SourceTableInfo.getKafkaParam("auto.offset.reset"); + Boolean topicIsPattern = kafka08SourceTableInfo.getPatternTopic(); + + Properties props = new Properties(); + for (String key : kafka08SourceTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka08SourceTableInfo.getKafkaParam(key)); + } + + TypeInformation[] types = new TypeInformation[kafka08SourceTableInfo.getFields().length]; + for (int i = 0; i < kafka08SourceTableInfo.getFieldClasses().length; i++) { + types[i] = TypeInformation.of(kafka08SourceTableInfo.getFieldClasses()[i]); + } + + TypeInformation typeInformation = new RowTypeInfo(types, kafka08SourceTableInfo.getFields()); + + FlinkKafkaConsumer08 kafkaSrc; + String fields = StringUtils.join(kafka08SourceTableInfo.getFields(), ","); + + if ("json".equalsIgnoreCase(kafka08SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerJsonConsumer(Pattern.compile(topicName), + new com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization(typeInformation), props); + } else { + kafkaSrc = new CustomerJsonConsumer(topicName, + new CustomerJsonDeserialization(typeInformation), props); + } + } else if ("csv".equalsIgnoreCase(kafka08SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerCsvConsumer(Pattern.compile(topicName), + new com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization(typeInformation, + kafka08SourceTableInfo.getFieldDelimiter(), kafka08SourceTableInfo.getLengthCheckPolicy()), props); + } else { + kafkaSrc = new CustomerCsvConsumer(topicName, + new CustomerCsvDeserialization(typeInformation, + kafka08SourceTableInfo.getFieldDelimiter(), kafka08SourceTableInfo.getLengthCheckPolicy()), props); + } + } else { + if (topicIsPattern) { + kafkaSrc = new CustomerCommonConsumer(Pattern.compile(topicName), new com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization(), props); + } else { + kafkaSrc = new CustomerCommonConsumer(topicName, new CustomerCommonDeserialization(), props); + } + } + + //earliest,latest + if ("earliest".equalsIgnoreCase(offsetReset)) { + kafkaSrc.setStartFromEarliest(); + } else if (DtStringUtil.isJosn(offsetReset)) {// {"0":12312,"1":12321,"2":12312} + try { + Properties properties = PluginUtil.jsonStrToObject(offsetReset, Properties.class); + Map offsetMap = PluginUtil.ObjectToMap(properties); + Map specificStartupOffsets = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + specificStartupOffsets.put(new KafkaTopicPartition(topicName, Integer.valueOf(entry.getKey())), Long.valueOf(entry.getValue().toString())); + } + kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); + } catch (Exception e) { + throw new RuntimeException("not support offsetReset type:" + offsetReset); + } + } else { + kafkaSrc.setStartFromLatest(); + } + String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka08SourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); + } +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java new file mode 100644 index 000000000..34b349e2c --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java @@ -0,0 +1,60 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; + +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ +public class CustomerCommonConsumer extends FlinkKafkaConsumer08 { + + private CustomerCommonDeserialization customerCommonDeserialization; + + + public CustomerCommonConsumer(String topic, KeyedDeserializationSchema deserializer, Properties props) { + super(topic, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + public CustomerCommonConsumer(Pattern subscriptionPattern, KeyedDeserializationSchema deserializer, Properties props) { + super(subscriptionPattern, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCommonDeserialization.setRuntimeContext(getRuntimeContext()); + customerCommonDeserialization.initMetric(); + super.run(sourceContext); + } + +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java new file mode 100644 index 000000000..7dc95450e --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08; +import org.apache.flink.types.Row; + +import java.util.Arrays; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ + +public class CustomerCsvConsumer extends FlinkKafkaConsumer08 { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerCsvDeserialization customerCsvDeserialization; + + public CustomerCsvConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + public CustomerCsvConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCsvDeserialization.setRuntimeContext(getRuntimeContext()); + customerCsvDeserialization.initMetric(); + super.run(sourceContext); + } + +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java new file mode 100644 index 000000000..b627d81c0 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08; +import org.apache.flink.types.Row; + +import java.util.Arrays; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Reason: + * Date: 2018/10/19 + * Company: www.dtstack.com + * + * @author xuchao + */ + +public class CustomerJsonConsumer extends FlinkKafkaConsumer08 { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerJsonDeserialization customerJsonDeserialization; + + public CustomerJsonConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + public CustomerJsonConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); + customerJsonDeserialization.initMetric(); + super.run(sourceContext); + } + + +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java new file mode 100644 index 000000000..4f0e9ec32 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.deserialization; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ +public class CustomerCommonDeserialization extends AbsDeserialization implements KeyedDeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(CustomerCommonDeserialization.class); + + public static final String[] KAFKA_COLUMNS = new String[]{"_TOPIC", "_MESSAGEKEY", "_MESSAGE", "_PARTITION", "_OFFSET"}; + + private boolean firstMsg = true; + + @Override + public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) { + + //numInRecord.inc(); + //numInBytes.inc(message.length); + //numInBytes.inc(messageKey.length); + + try { + Row row = Row.of( + topic, //topic + messageKey == null ? null : new String(messageKey, UTF_8), //key + new String(message, UTF_8), //message + partition, + offset + ); + return row; + } catch (Throwable t) { + LOG.error(t.getMessage()); + // dirtyDataCounter.inc(); + return null; + } + } + + @Override + public Row deserialize(byte[] message) throws IOException { + return null; + } + + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + TypeInformation[] types = new TypeInformation[]{ + TypeExtractor.createTypeInfo(String.class), + TypeExtractor.createTypeInfo(String.class), //createTypeInformation[String] + TypeExtractor.createTypeInfo(String.class), + Types.INT, + Types.LONG + }; + return new RowTypeInfo(types, KAFKA_COLUMNS); + } + +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java new file mode 100644 index 000000000..0ca57e0b3 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java @@ -0,0 +1,120 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.deserialization; + + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.types.Row; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + */ + +public class CustomerCsvDeserialization extends AbsDeserialization { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerCsvDeserialization.class); + + private static final long serialVersionUID = -2706012724306826506L; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + /** Flag indicating whether to fail on a missing field. */ + private boolean failOnMissingField; + + private String fieldDelimiter; + + private String lengthCheckPolicy; + + public CustomerCsvDeserialization(TypeInformation typeInfo, String fieldDelimiter, String lengthCheckPolicy){ + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + + this.fieldDelimiter = fieldDelimiter; + + this.lengthCheckPolicy = lengthCheckPolicy; + } + + @Override + public Row deserialize(byte[] message) throws IOException { + + try { + //numInRecord.inc(); + //numInBytes.inc(message.length); + String[] fieldsList = null; + if (message != null && message.length > 0){ + fieldsList = new String(message).split(fieldDelimiter); + } + if (fieldsList == null || fieldsList.length != fieldNames.length){//exception condition + if (lengthCheckPolicy.equalsIgnoreCase("SKIP")) { + return null; + }else if (lengthCheckPolicy.equalsIgnoreCase("EXCEPTION")) { + throw new RuntimeException("lengthCheckPolicy Error,message have "+fieldsList.length+" fields,sql have "+fieldNames.length); + } + } + + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + if (i { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerJsonDeserialization.class); + + private static final long serialVersionUID = 2385115520960444192L; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + /** Flag indicating whether to fail on a missing field. */ + private boolean failOnMissingField; + + private AbstractFetcher fetcher; + + public CustomerJsonDeserialization(TypeInformation typeInfo){ + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + } + + @Override + public Row deserialize(byte[] message) throws IOException { + + try { + // numInRecord.inc(); + // numInBytes.inc(message.length); + + JsonNode root = objectMapper.readTree(message); + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + JsonNode node = getIgnoreCase(root, fieldNames[i]); + + if (node == null) { + if (failOnMissingField) { + throw new IllegalStateException("Failed to find field with name '" + + fieldNames[i] + "'."); + } else { + row.setField(i, null); + } + } else { + // Read the value as specified type + Object value = objectMapper.treeToValue(node, fieldTypes[i].getTypeClass()); + row.setField(i, value); + } + } + + // numInResolveRecord.inc(); + return row; + } catch (Throwable t) { + //add metric of dirty data + LOG.error(t.getMessage()); + // dirtyDataCounter.inc(); + return null; + } + } + + public void setFailOnMissingField(boolean failOnMissingField) { + this.failOnMissingField = failOnMissingField; + } + + public JsonNode getIgnoreCase(JsonNode jsonNode, String key) { + + Iterator iter = jsonNode.fieldNames(); + while (iter.hasNext()) { + String key1 = iter.next(); + if (key1.equalsIgnoreCase(key)) { + return jsonNode.get(key1); + } + } + + return null; + + } + + public void setFetcher(AbstractFetcher fetcher) { + this.fetcher = fetcher; + } + + + private static String partitionLagMetricName(TopicPartition tp) { + return tp + ".records-lag"; + } +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java new file mode 100644 index 000000000..eb085ac78 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.table; + +import com.dtstack.flink.sql.table.AbsSourceParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author xuchao + * + * @modifyer DocLi + */ + +public class KafkaSourceParser extends AbsSourceParser { + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + + KafkaSourceTableInfo kafka08SourceTableInfo = new KafkaSourceTableInfo(); + kafka08SourceTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka08SourceTableInfo); + kafka08SourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); + + kafka08SourceTableInfo.setPatternTopic(MathUtil.getBoolean(props.get(KafkaSourceTableInfo.PATTERNTOPIC_KEY.toLowerCase()))); + + kafka08SourceTableInfo.setTimeZone(MathUtil.getString(props.get(KafkaSourceTableInfo.TIME_ZONE_KEY.toLowerCase()))); + + if (props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE) != null) { + kafka08SourceTableInfo.setSourceDataType(props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE).toString()); + } + if (props.get(KafkaSourceTableInfo.FIELD_DELINITER) != null) { + kafka08SourceTableInfo.setFieldDelimiter(props.get(KafkaSourceTableInfo.FIELD_DELINITER).toString()); + } + if (props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY) != null) { + kafka08SourceTableInfo.setLengthCheckPolicy(props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY).toString()); + } + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka08SourceTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka08SourceTableInfo; + } +} diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java new file mode 100644 index 000000000..9f7e36f56 --- /dev/null +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.table; + +import com.dtstack.flink.sql.table.SourceTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + + +public class KafkaSourceTableInfo extends SourceTableInfo { + + //version + private static final String CURR_TYPE = "kafka08"; + + public static final String PATTERNTOPIC_KEY = "patterntopic"; + + private Boolean patternTopic=false; + + public Boolean getPatternTopic() { + return patternTopic; + } + + public void setPatternTopic(Boolean patternTopic) { + if (patternTopic==null){ + return; + } + this.patternTopic = patternTopic; + } + + public KafkaSourceTableInfo(){ + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap<>(); + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + String offset = kafkaParam.get("auto.offset.reset"); + Preconditions.checkState(offset.equalsIgnoreCase("latest") + || offset.equalsIgnoreCase("earliest"), "kafka of offsetReset set fail"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka08/pom.xml b/kafka08/pom.xml new file mode 100644 index 000000000..ced039c1c --- /dev/null +++ b/kafka08/pom.xml @@ -0,0 +1,37 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.kafka08 + pom + + + kafka08-source + kafka08-sink + + + + + junit + junit + 3.8.1 + test + + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + + + \ No newline at end of file diff --git a/kafka09/kafka09-sink/pom.xml b/kafka09/kafka09-sink/pom.xml new file mode 100644 index 000000000..5d11d9df8 --- /dev/null +++ b/kafka09/kafka09-sink/pom.xml @@ -0,0 +1,90 @@ + + + + sql.kafka09 + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kafka09 + jar + + kafka09-sink + http://maven.apache.org + + + + org.apache.flink + flink-json + ${flink.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java new file mode 100644 index 000000000..88a10a293 --- /dev/null +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java @@ -0,0 +1,127 @@ +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.table.shaded.org.apache.commons.lang.StringEscapeUtils; +import org.apache.flink.types.Row; +import org.apache.flink.types.StringValue; + +import java.io.IOException; + +import static org.apache.flink.api.java.typeutils.runtime.NullMaskUtils.writeNullMask; +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +@Internal +public final class CustomerCsvSerialization extends TypeSerializerSingleton { + + private static final long serialVersionUID = 1L; + + private String fieldDelimiter = "\u0001"; + private TypeInformation[] fieldTypes; + private TypeSerializer[] fieldSerializers; + private static final Row EMPTY = null; + + public CustomerCsvSerialization(String fielddelimiter,TypeInformation[] fieldTypes) { + this.fieldDelimiter = fielddelimiter; + this.fieldTypes = fieldTypes; + this.fieldSerializers = (TypeSerializer[])createSerializer(new ExecutionConfig()); + } + + public TypeSerializer[] createSerializer(ExecutionConfig config) { + int len = fieldTypes.length; + TypeSerializer[] fieldSerializers = new TypeSerializer[len]; + for (int i = 0; i < len; i++) { + fieldSerializers[i] = fieldTypes[i].createSerializer(config); + } + return fieldSerializers; + } + + @Override + public boolean isImmutableType() { + return true; + } + + @Override + public Row createInstance() { + return EMPTY; + } + + @Override + public Row copy(Row from) { + return null; + } + + @Override + public Row copy(Row from, Row reuse) { + return null; + } + + @Override + public int getLength() { + return -1; + } + + @Override + public void serialize(Row record, DataOutputView target) throws IOException { + int len = fieldSerializers.length; + + if (record.getArity() != len) { + throw new RuntimeException("Row arity of from does not match serializers."); + } + + // write a null mask + writeNullMask(len, record, target); + + // serialize non-null fields + StringBuffer stringBuffer = new StringBuffer(); + for (int i = 0; i < len; i++) { + Object o = record.getField(i); + if (o != null) { + //fieldSerializers[i].serialize(o, target); + stringBuffer.append(o); + } + if(i != len-1){ + stringBuffer.append(StringEscapeUtils.unescapeJava(fieldDelimiter)); + //fieldSerializers[i].serialize(fieldDelimiter, target); + } + } + StringValue.writeString(stringBuffer.toString(), target); + } + + @Override + public Row deserialize(DataInputView source) throws IOException { + return null; + } + + @Override + public Row deserialize(Row reuse, DataInputView source) throws IOException { + return null; + } + + @Override + public void copy(DataInputView source, DataOutputView target) throws IOException { + StringValue.copyString(source, target); + } + + @Override + public boolean canEqual(Object obj) { + return obj instanceof CustomerCsvSerialization; + } + + @Override + protected boolean isCompatibleSerializationFormatIdentifier(String identifier) { + return super.isCompatibleSerializationFormatIdentifier(identifier) + || identifier.equals(StringValue.class.getCanonicalName()); + } +} diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java new file mode 100644 index 000000000..106c3ab5c --- /dev/null +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -0,0 +1,140 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowSerializationSchema; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.connectors.kafka.Kafka09TableSink; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSink; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.api.TableSchemaBuilder; +import org.apache.flink.table.sinks.AppendStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSink implements AppendStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + + protected TypeInformation[] fieldTypes; + + /** The schema of the table. */ + private TableSchema schema; + + /** The Kafka topic to write to. */ + protected String topic; + + /** Properties for the Kafka producer. */ + protected Properties properties; + + /** Serialization schema for encoding records to Kafka. */ + protected SerializationSchema serializationSchema; + + /** Partitioner to select Kafka partition for each item. */ + protected Optional> partitioner; + + @Override + public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { + KafkaSinkTableInfo kafka09SinkTableInfo = (KafkaSinkTableInfo) targetTableInfo; + this.topic = kafka09SinkTableInfo.getKafkaParam("topic"); + + Properties props = new Properties(); + for (String key:kafka09SinkTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka09SinkTableInfo.getKafkaParam(key)); + } + this.properties = props; + this.partitioner = Optional.of(new FlinkFixedPartitioner<>()); + this.fieldNames = kafka09SinkTableInfo.getFields(); + TypeInformation[] types = new TypeInformation[kafka09SinkTableInfo.getFields().length]; + for(int i = 0; i< kafka09SinkTableInfo.getFieldClasses().length; i++){ + types[i] = TypeInformation.of(kafka09SinkTableInfo.getFieldClasses()[i]); + } + this.fieldTypes = types; + + TableSchemaBuilder schemaBuilder = TableSchema.builder(); + for (int i=0;i dataStream) { + KafkaTableSink kafkaTableSink = new Kafka09TableSink( + schema, + topic, + properties, + partitioner, + serializationSchema + ); + + kafkaTableSink.emitDataStream(dataStream); + } + + @Override + public TypeInformation getOutputType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public TableSink configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + +} diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java new file mode 100644 index 000000000..b51b9ea01 --- /dev/null +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KafkaSinkTableInfo kafka09SinkTableInfo = new KafkaSinkTableInfo(); + kafka09SinkTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka09SinkTableInfo); + kafka09SinkTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase()))); + if (props.get(KafkaSinkTableInfo.SINK_DATA_TYPE) != null) { + kafka09SinkTableInfo.setSinkDataType(props.get(KafkaSinkTableInfo.SINK_DATA_TYPE).toString()); + } + if (props.get(KafkaSinkTableInfo.FIELD_DELINITER) != null) { + kafka09SinkTableInfo.setFieldDelimiter(props.get(KafkaSinkTableInfo.FIELD_DELINITER).toString()); + } + + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka09SinkTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka09SinkTableInfo; + } +} diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java new file mode 100644 index 000000000..48daf57dd --- /dev/null +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkTableInfo extends TargetTableInfo { + //version + private static final String CURR_TYPE = "kafka09"; + + public KafkaSinkTableInfo(){ + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap(); + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka09/kafka09-source/pom.xml b/kafka09/kafka09-source/pom.xml index 59e05c020..6f26b62a0 100644 --- a/kafka09/kafka09-source/pom.xml +++ b/kafka09/kafka09-source/pom.xml @@ -14,16 +14,6 @@ kafka09-source http://maven.apache.org - - - - org.apache.flink - flink-connector-kafka-0.9_2.11 - ${flink.version} - - - - diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index cbc697c82..292f70f51 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -21,14 +21,20 @@ package com.dtstack.flink.sql.source.kafka; import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCommonConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCsvConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerJsonConsumer; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import com.dtstack.flink.sql.source.kafka.table.KafkaSourceTableInfo; import com.dtstack.flink.sql.table.SourceTableInfo; import com.dtstack.flink.sql.util.DtStringUtil; import com.dtstack.flink.sql.util.PluginUtil; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.api.common.functions.RuntimeContext; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; @@ -39,6 +45,7 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** * If eventtime field is specified, the default time field rowtime @@ -62,12 +69,15 @@ public class KafkaSource implements IStreamSourceGener

{ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { KafkaSourceTableInfo kafka09SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; - String topicName = kafka09SourceTableInfo.getTopic(); + String topicName = kafka09SourceTableInfo.getKafkaParam("topic"); + String offsetReset = kafka09SourceTableInfo.getKafkaParam("auto.offset.reset"); + Boolean topicIsPattern = kafka09SourceTableInfo.getPatternTopic(); Properties props = new Properties(); - props.setProperty("bootstrap.servers", kafka09SourceTableInfo.getBootstrapServers()); - props.setProperty("auto.offset.reset", kafka09SourceTableInfo.getOffsetReset()); - //TODO props.setProperty("zookeeper.connect", kafka09SourceTableInfo.) + + for (String key:kafka09SourceTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka09SourceTableInfo.getKafkaParam(key)); + } TypeInformation[] types = new TypeInformation[kafka09SourceTableInfo.getFields().length]; for(int i = 0; i< kafka09SourceTableInfo.getFieldClasses().length; i++){ @@ -75,15 +85,43 @@ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnv } TypeInformation typeInformation = new RowTypeInfo(types, kafka09SourceTableInfo.getFields()); - FlinkKafkaConsumer09 kafkaSrc = new CustomerKafka09Consumer(topicName, - new CustomerJsonDeserialization(typeInformation), props); + + + FlinkKafkaConsumer09 kafkaSrc; + String fields=StringUtils.join(kafka09SourceTableInfo.getFields(), ","); + + if ("json".equalsIgnoreCase(kafka09SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerJsonConsumer(Pattern.compile(topicName), + new com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization(typeInformation), props); + } else { + kafkaSrc = new CustomerJsonConsumer(topicName, + new CustomerJsonDeserialization(typeInformation), props); + } + } else if ("csv".equalsIgnoreCase(kafka09SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerCsvConsumer(Pattern.compile(topicName), + new CustomerCsvDeserialization(typeInformation, + kafka09SourceTableInfo.getFieldDelimiter(), kafka09SourceTableInfo.getLengthCheckPolicy()), props); + } else { + kafkaSrc = new CustomerCsvConsumer(topicName, + new CustomerCsvDeserialization(typeInformation, + kafka09SourceTableInfo.getFieldDelimiter(), kafka09SourceTableInfo.getLengthCheckPolicy()), props); + } + } else { + if (topicIsPattern) { + kafkaSrc = new CustomerCommonConsumer(Pattern.compile(topicName), new CustomerCommonDeserialization(), props); + } else { + kafkaSrc = new CustomerCommonConsumer(topicName, new CustomerCommonDeserialization(), props); + } + } //earliest,latest - if("earliest".equalsIgnoreCase(kafka09SourceTableInfo.getOffsetReset())){ + if("earliest".equalsIgnoreCase(offsetReset)){ kafkaSrc.setStartFromEarliest(); - }else if(DtStringUtil.isJosn(kafka09SourceTableInfo.getOffsetReset())){// {"0":12312,"1":12321,"2":12312} + }else if(DtStringUtil.isJosn(offsetReset)){// {"0":12312,"1":12321,"2":12312} try { - Properties properties = PluginUtil.jsonStrToObject(kafka09SourceTableInfo.getOffsetReset(), Properties.class); + Properties properties = PluginUtil.jsonStrToObject(offsetReset, Properties.class); Map offsetMap = PluginUtil.ObjectToMap(properties); Map specificStartupOffsets = new HashMap<>(); for(Map.Entry entry:offsetMap.entrySet()){ @@ -91,14 +129,17 @@ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnv } kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); } catch (Exception e) { - throw new RuntimeException("not support offsetReset type:" + kafka09SourceTableInfo.getOffsetReset()); + throw new RuntimeException("not support offsetReset type:" + offsetReset); } }else { kafkaSrc.setStartFromLatest(); } - - String fields = StringUtils.join(kafka09SourceTableInfo.getFields(), ","); String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka09SourceTableInfo.getParallelism(); + if(parallelism != null){ + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); } } diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java new file mode 100644 index 000000000..f6e0eb670 --- /dev/null +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonConsumer extends FlinkKafkaConsumer09 { + + private CustomerCommonDeserialization customerCommonDeserialization; + + + public CustomerCommonConsumer(String topic, KeyedDeserializationSchema deserializer, Properties props) { + super(topic, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + public CustomerCommonConsumer(Pattern subscriptionPattern, KeyedDeserializationSchema deserializer, Properties props) { + super(subscriptionPattern, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCommonDeserialization.setRuntimeContext(getRuntimeContext()); + customerCommonDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerCommonDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java new file mode 100644 index 000000000..70fd50b7b --- /dev/null +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Arrays; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ + +public class CustomerCsvConsumer extends FlinkKafkaConsumer09 { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerCsvDeserialization customerCsvDeserialization; + + public CustomerCsvConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + public CustomerCsvConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCsvDeserialization.setRuntimeContext(getRuntimeContext()); + customerCsvDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerCsvDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java new file mode 100644 index 000000000..e3e9e2d8a --- /dev/null +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Arrays; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author xuchao + * + * @modifyer maqi + * + */ + +public class CustomerJsonConsumer extends FlinkKafkaConsumer09 { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerJsonDeserialization customerJsonDeserialization; + + public CustomerJsonConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + public CustomerJsonConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); + customerJsonDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerJsonDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java new file mode 100644 index 000000000..524b0cffb --- /dev/null +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.deserialization; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.*; +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonDeserialization extends AbsDeserialization implements KeyedDeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(CustomerCommonDeserialization.class); + + public static final String[] KAFKA_COLUMNS = new String[]{"_TOPIC", "_MESSAGEKEY", "_MESSAGE", "_PARTITION", "_OFFSET"}; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + @Override + public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) { + if (firstMsg) { + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + firstMsg = false; + } + + numInRecord.inc(); + if(message!=null){numInBytes.inc(message.length);} + if(messageKey!=null){numInBytes.inc(messageKey.length);} + + try { + Row row = Row.of( + topic, //topic + messageKey == null ? null : new String(messageKey, UTF_8), //key + new String(message, UTF_8), //message + partition, + offset + ); + return row; + } catch (Throwable t) { + LOG.error(t.getMessage()); + dirtyDataCounter.inc(); + return null; + } + } + + @Override + public Row deserialize(byte[] message) throws IOException { + return null; + } + + public void setFetcher(AbstractFetcher fetcher) { + this.fetcher = fetcher; + } + + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + TypeInformation[] types = new TypeInformation[]{ + TypeExtractor.createTypeInfo(String.class), + TypeExtractor.createTypeInfo(String.class), //createTypeInformation[String] + TypeExtractor.createTypeInfo(String.class), + Types.INT, + Types.LONG + }; + return new RowTypeInfo(types, KAFKA_COLUMNS); + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if (!hasAssignedPartitions) { + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for (TopicPartition topicPartition : assignedPartitions) { + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java new file mode 100644 index 000000000..20829bf61 --- /dev/null +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java @@ -0,0 +1,182 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.deserialization; + + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.*; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCsvDeserialization extends AbsDeserialization { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerCsvDeserialization.class); + + private static final long serialVersionUID = -2706012724306826506L; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + /** Flag indicating whether to fail on a missing field. */ + private boolean failOnMissingField; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + private String fieldDelimiter; + + private String lengthCheckPolicy; + + public CustomerCsvDeserialization(TypeInformation typeInfo, String fieldDelimiter, String lengthCheckPolicy){ + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + + this.fieldDelimiter = fieldDelimiter; + + this.lengthCheckPolicy = lengthCheckPolicy; + } + + @Override + public Row deserialize(byte[] message) throws IOException { + if(firstMsg){ + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + + firstMsg = false; + } + + try { + numInRecord.inc(); + numInBytes.inc(message.length); + String[] fieldsList = null; + if (message != null && message.length > 0){ + fieldsList = new String(message).split(fieldDelimiter); + } + if (fieldsList == null || fieldsList.length != fieldNames.length){//exception condition + if (lengthCheckPolicy.equalsIgnoreCase("SKIP")) { + return null; + }else if (lengthCheckPolicy.equalsIgnoreCase("EXCEPTION")) { + throw new RuntimeException("lengthCheckPolicy Error,message have "+fieldsList.length+" fields,sql have "+fieldNames.length); + } + } + + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + if (i fetcher) { + this.fetcher = fetcher; + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if(!hasAssignedPartitions){ + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for(TopicPartition topicPartition : assignedPartitions){ + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } + + public void setFailOnMissingField(boolean failOnMissingField) { + this.failOnMissingField = failOnMissingField; + } + +} diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java similarity index 98% rename from kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java rename to kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java index bcb54159a..24b607ee1 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java @@ -18,7 +18,7 @@ -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.deserialization; import com.dtstack.flink.sql.source.AbsDeserialization; @@ -123,6 +123,7 @@ public Row deserialize(byte[] message) throws IOException { return row; } catch (Throwable t) { //add metric of dirty data + LOG.error(t.getMessage()); dirtyDataCounter.inc(); return null; } diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index 898083826..9bc4df8e2 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -16,7 +16,7 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.source.kafka.table; @@ -27,25 +27,42 @@ import java.util.Map; /** - * Reason: - * Date: 2018/7/4 + /** + * Date: 2018/12/18 * Company: www.dtstack.com * @author xuchao + * + * @modifyer Docli + * */ public class KafkaSourceParser extends AbsSourceParser { @Override public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { - KafkaSourceTableInfo kafka09SourceTableInfo = new KafkaSourceTableInfo(); kafka09SourceTableInfo.setName(tableName); parseFieldsInfo(fieldsInfo, kafka09SourceTableInfo); - kafka09SourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); - kafka09SourceTableInfo.setBootstrapServers(MathUtil.getString(props.get(KafkaSourceTableInfo.BOOTSTRAPSERVERS_KEY.toLowerCase()))); - kafka09SourceTableInfo.setGroupId(MathUtil.getString(props.get(KafkaSourceTableInfo.GROUPID_KEY.toLowerCase()))); - kafka09SourceTableInfo.setTopic(MathUtil.getString(props.get(KafkaSourceTableInfo.TOPIC_KEY.toLowerCase()))); + + kafka09SourceTableInfo.setPatternTopic(MathUtil.getBoolean(props.get(KafkaSourceTableInfo.PATTERNTOPIC_KEY.toLowerCase()))); + + kafka09SourceTableInfo.setTimeZone(MathUtil.getString(props.get(KafkaSourceTableInfo.TIME_ZONE_KEY.toLowerCase()))); + + if (props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE) != null) { + kafka09SourceTableInfo.setSourceDataType(props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE).toString()); + } + if (props.get(KafkaSourceTableInfo.FIELD_DELINITER) != null) { + kafka09SourceTableInfo.setFieldDelimiter(props.get(KafkaSourceTableInfo.FIELD_DELINITER).toString()); + } + if (props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY) != null) { + kafka09SourceTableInfo.setLengthCheckPolicy(props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY).toString()); + } + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka09SourceTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } return kafka09SourceTableInfo; } } diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 29239ad75..cb524f538 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -16,105 +16,80 @@ * limitations under the License. */ - package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + /** * Reason: * Date: 2018/6/22 * Company: www.dtstack.com + * * @author xuchao */ public class KafkaSourceTableInfo extends SourceTableInfo { - //version - private static final String CURR_TYPE = "kafka09"; - - public static final String BOOTSTRAPSERVERS_KEY = "bootstrapServers"; - - public static final String TOPIC_KEY = "topic"; - - public static final String GROUPID_KEY = "groupId"; - - private String bootstrapServers; - - private String topic; - - private String groupId; - - //latest, earliest - private String offsetReset = "latest"; - - private String offset; - - public KafkaSourceTableInfo(){ - super.setType(CURR_TYPE); - } - + //version + private static final String CURR_TYPE = "kafka09"; - public String getBootstrapServers() { - return bootstrapServers; - } + public static final String PATTERNTOPIC_KEY = "patterntopic"; - public void setBootstrapServers(String bootstrapServers) { - this.bootstrapServers = bootstrapServers; - } + private Boolean patternTopic=false; - public String getTopic() { - return topic; - } + public Boolean getPatternTopic() { + return patternTopic; + } - public void setTopic(String topic) { - this.topic = topic; - } + public void setPatternTopic(Boolean patternTopic) { + if (patternTopic==null){ + return; + } + this.patternTopic = patternTopic; + } - public String getGroupId() { - return groupId; - } + public KafkaSourceTableInfo(){ + super.setType(CURR_TYPE); + } - public void setGroupId(String groupId) { - this.groupId = groupId; - } - public String getOffsetReset() { - return offsetReset; - } + public Map kafkaParam = new HashMap<>(); - public void setOffsetReset(String offsetReset) { - if(offsetReset == null){ - return; - } - this.offsetReset = offsetReset; - } + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } - public String getOffset() { - return offset; - } + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } - public void setOffset(String offset) { - this.offset = offset; - } + public Boolean getKafkaBooleanParam(String key){ + return Boolean.valueOf(kafkaParam.getOrDefault(key,"false").toLowerCase()); + } + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } - @Override - public boolean check() { - Preconditions.checkNotNull(bootstrapServers, "kafka of bootstrapServers is required"); - Preconditions.checkNotNull(topic, "kafka of topic is required"); - Preconditions.checkNotNull(groupId, "kafka of groupId is required"); - Preconditions.checkState(offsetReset.equalsIgnoreCase("latest") - || offsetReset.equalsIgnoreCase("latest"), "kafka of offsetReset set fail"); + @Override + public boolean check() { - return false; - } + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + String offset = kafkaParam.get("auto.offset.reset"); + Preconditions.checkState(offset.equalsIgnoreCase("latest") + || offset.equalsIgnoreCase("earliest"), "kafka of offsetReset set fail"); + return false; + } - @Override - public String getType() { -// return super.getType() + SOURCE_SUFFIX; - return super.getType(); - } + @Override + public String getType() { + return super.getType(); + } } diff --git a/kafka09/pom.xml b/kafka09/pom.xml index 457a8d387..1ae761e9b 100644 --- a/kafka09/pom.xml +++ b/kafka09/pom.xml @@ -15,9 +15,17 @@ kafka09-source + kafka09-sink + + + org.apache.flink + flink-connector-kafka-0.9_2.11 + ${flink.version} + + junit junit diff --git a/kafka10/kafka10-sink/pom.xml b/kafka10/kafka10-sink/pom.xml new file mode 100644 index 000000000..3a8b7592e --- /dev/null +++ b/kafka10/kafka10-sink/pom.xml @@ -0,0 +1,89 @@ + + + + sql.kafka10 + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kafka10 + jar + + kafka10-sink + http://maven.apache.org + + + + org.apache.flink + flink-json + ${flink.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java new file mode 100644 index 000000000..825703332 --- /dev/null +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java @@ -0,0 +1,129 @@ +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.table.shaded.org.apache.commons.lang.StringEscapeUtils; +import org.apache.flink.types.Row; +import org.apache.flink.types.StringValue; + +import java.io.IOException; + +import static org.apache.flink.api.java.typeutils.runtime.NullMaskUtils.writeNullMask; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +@Internal +public final class CustomerCsvSerialization extends TypeSerializerSingleton { + + private static final long serialVersionUID = 1L; + + private String fieldDelimiter = "\u0001"; + private TypeInformation[] fieldTypes; + private TypeSerializer[] fieldSerializers; + private static final Row EMPTY = null; + + public CustomerCsvSerialization(String fielddelimiter,TypeInformation[] fieldTypes) { + this.fieldDelimiter = fielddelimiter; + this.fieldTypes = fieldTypes; + this.fieldSerializers = (TypeSerializer[])createSerializer(new ExecutionConfig()); + } + + public TypeSerializer[] createSerializer(ExecutionConfig config) { + int len = fieldTypes.length; + TypeSerializer[] fieldSerializers = new TypeSerializer[len]; + for (int i = 0; i < len; i++) { + fieldSerializers[i] = fieldTypes[i].createSerializer(config); + } + return fieldSerializers; + } + + @Override + public boolean isImmutableType() { + return true; + } + + @Override + public Row createInstance() { + return EMPTY; + } + + @Override + public Row copy(Row from) { + return null; + } + + @Override + public Row copy(Row from, Row reuse) { + return null; + } + + @Override + public int getLength() { + return -1; + } + + @Override + public void serialize(Row record, DataOutputView target) throws IOException { + int len = fieldSerializers.length; + + if (record.getArity() != len) { + throw new RuntimeException("Row arity of from does not match serializers."); + } + + // write a null mask + writeNullMask(len, record, target); + + // serialize non-null fields + StringBuffer stringBuffer = new StringBuffer(); + for (int i = 0; i < len; i++) { + Object o = record.getField(i); + if (o != null) { + //fieldSerializers[i].serialize(o, target); + stringBuffer.append(o); + } + if(i != len-1){ + stringBuffer.append(StringEscapeUtils.unescapeJava(fieldDelimiter)); + //fieldSerializers[i].serialize(fieldDelimiter, target); + } + } + StringValue.writeString(stringBuffer.toString(), target); + } + + @Override + public Row deserialize(DataInputView source) throws IOException { + return null; + } + + @Override + public Row deserialize(Row reuse, DataInputView source) throws IOException { + return null; + } + + @Override + public void copy(DataInputView source, DataOutputView target) throws IOException { + StringValue.copyString(source, target); + } + + @Override + public boolean canEqual(Object obj) { + return obj instanceof CustomerCsvSerialization; + } + + @Override + protected boolean isCompatibleSerializationFormatIdentifier(String identifier) { + return super.isCompatibleSerializationFormatIdentifier(identifier) + || identifier.equals(StringValue.class.getCanonicalName()); + } +} diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java new file mode 100644 index 000000000..7d3748441 --- /dev/null +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -0,0 +1,142 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowSerializationSchema; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.connectors.kafka.Kafka010TableSink; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSink; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.api.TableSchemaBuilder; +import org.apache.flink.table.sinks.AppendStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSink implements AppendStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + + protected TypeInformation[] fieldTypes; + + /** The schema of the table. */ + private TableSchema schema; + + /** The Kafka topic to write to. */ + protected String topic; + + /** Properties for the Kafka producer. */ + protected Properties properties; + + /** Serialization schema for encoding records to Kafka. */ + protected SerializationSchema serializationSchema; + + /** Partitioner to select Kafka partition for each item. */ + protected Optional> partitioner; + + @Override + public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { + KafkaSinkTableInfo kafka010SinkTableInfo = (KafkaSinkTableInfo) targetTableInfo; + this.topic = kafka010SinkTableInfo.getKafkaParam("topic"); + + Properties props = new Properties(); + for (String key:kafka010SinkTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka010SinkTableInfo.getKafkaParam(key)); + } + this.properties = props; + this.partitioner = Optional.of(new FlinkFixedPartitioner<>()); + this.fieldNames = kafka010SinkTableInfo.getFields(); + TypeInformation[] types = new TypeInformation[kafka010SinkTableInfo.getFields().length]; + for(int i = 0; i< kafka010SinkTableInfo.getFieldClasses().length; i++){ + types[i] = TypeInformation.of(kafka010SinkTableInfo.getFieldClasses()[i]); + } + this.fieldTypes = types; + + TableSchemaBuilder schemaBuilder = TableSchema.builder(); + for (int i=0;i dataStream) { + KafkaTableSink kafkaTableSink = new Kafka010TableSink( + schema, + topic, + properties, + partitioner, + serializationSchema + ); + + kafkaTableSink.emitDataStream(dataStream); + } + + @Override + public TypeInformation getOutputType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public TableSink configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + +} diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java new file mode 100644 index 000000000..c0818f34c --- /dev/null +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +/** + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KafkaSinkTableInfo kafka10SinkTableInfo = new KafkaSinkTableInfo(); + kafka10SinkTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka10SinkTableInfo); + kafka10SinkTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase()))); + if (props.get(KafkaSinkTableInfo.SINK_DATA_TYPE) != null) { + kafka10SinkTableInfo.setSinkDataType(props.get(KafkaSinkTableInfo.SINK_DATA_TYPE).toString()); + } + if (props.get(KafkaSinkTableInfo.FIELD_DELINITER) != null) { + kafka10SinkTableInfo.setFieldDelimiter(props.get(KafkaSinkTableInfo.FIELD_DELINITER).toString()); + } + + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka10SinkTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka10SinkTableInfo; + } +} diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java new file mode 100644 index 000000000..0235547d2 --- /dev/null +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java @@ -0,0 +1,71 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkTableInfo extends TargetTableInfo { + //version + private static final String CURR_TYPE = "kafka10"; + + public KafkaSinkTableInfo(){ + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap(); + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + //Preconditions.checkNotNull(kafkaParam.get("groupId"), "kafka of groupId is required"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka10/kafka10-source/pom.xml b/kafka10/kafka10-source/pom.xml index 365f98048..0f0c55c36 100644 --- a/kafka10/kafka10-source/pom.xml +++ b/kafka10/kafka10-source/pom.xml @@ -13,17 +13,6 @@ kafka10-source http://maven.apache.org - - - - org.apache.flink - flink-connector-kafka-0.10_2.11 - ${flink.version} - - - - - diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index 2953d0f86..ef85e5b56 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -16,11 +16,16 @@ * limitations under the License. */ - package com.dtstack.flink.sql.source.kafka; import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCommonConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCsvConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerJsonConsumer; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import com.dtstack.flink.sql.source.kafka.table.KafkaSourceTableInfo; import com.dtstack.flink.sql.table.SourceTableInfo; import com.dtstack.flink.sql.util.DtStringUtil; @@ -28,6 +33,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; @@ -38,66 +44,106 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** * If eventtime field is specified, the default time field rowtime * Date: 2018/09/18 * Company: www.dtstack.com + * * @author sishu.yss */ public class KafkaSource implements IStreamSourceGener

{ - private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; + private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; - /** - * Get kafka data source, you need to provide the data field names, data types - * If you do not specify auto.offset.reset, the default use groupoffset - * @param sourceTableInfo - * @return - */ - @SuppressWarnings("rawtypes") - @Override + /** + * Get kafka data source, you need to provide the data field names, data types + * If you do not specify auto.offset.reset, the default use groupoffset + * + * @param sourceTableInfo + * @return + */ + @SuppressWarnings("rawtypes") + @Override public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { - KafkaSourceTableInfo kafka010SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; - String topicName = kafka010SourceTableInfo.getTopic(); - - Properties props = new Properties(); - props.setProperty("bootstrap.servers", kafka010SourceTableInfo.getBootstrapServers()); - props.setProperty("auto.offset.reset", kafka010SourceTableInfo.getOffsetReset()); - //TODO props.setProperty("zookeeper.connect", kafka09SourceTableInfo.) - - TypeInformation[] types = new TypeInformation[kafka010SourceTableInfo.getFields().length]; - for(int i = 0; i< kafka010SourceTableInfo.getFieldClasses().length; i++){ - types[i] = TypeInformation.of(kafka010SourceTableInfo.getFieldClasses()[i]); - } - - TypeInformation typeInformation = new RowTypeInfo(types, kafka010SourceTableInfo.getFields()); - FlinkKafkaConsumer010 kafkaSrc = new CustomerKafka010Consumer(topicName, - new CustomerJsonDeserialization(typeInformation), props); - - //earliest,latest - if("earliest".equalsIgnoreCase(kafka010SourceTableInfo.getOffsetReset())){ - kafkaSrc.setStartFromEarliest(); - }else if(DtStringUtil.isJosn(kafka010SourceTableInfo.getOffsetReset())){// {"0":12312,"1":12321,"2":12312} - try { - Properties properties = PluginUtil.jsonStrToObject(kafka010SourceTableInfo.getOffsetReset(), Properties.class); - Map offsetMap = PluginUtil.ObjectToMap(properties); - Map specificStartupOffsets = new HashMap<>(); - for(Map.Entry entry:offsetMap.entrySet()){ - specificStartupOffsets.put(new KafkaTopicPartition(topicName,Integer.valueOf(entry.getKey())),Long.valueOf(entry.getValue().toString())); - } - kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); - } catch (Exception e) { - throw new RuntimeException("not support offsetReset type:" + kafka010SourceTableInfo.getOffsetReset()); - } - }else { - kafkaSrc.setStartFromLatest(); - } - - String fields = StringUtils.join(kafka010SourceTableInfo.getFields(), ","); - String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); - } + + KafkaSourceTableInfo kafka010SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; + String topicName = kafka010SourceTableInfo.getKafkaParam("topic"); + String offsetReset = kafka010SourceTableInfo.getKafkaParam("auto.offset.reset"); + Boolean topicIsPattern = kafka010SourceTableInfo.getPatternTopic(); + + Properties props = new Properties(); + for (String key:kafka010SourceTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka010SourceTableInfo.getKafkaParam(key)); + } + + TypeInformation[] types = new TypeInformation[kafka010SourceTableInfo.getFields().length]; + + for(int i = 0; i< kafka010SourceTableInfo.getFieldClasses().length; i++){ + types[i] = TypeInformation.of(kafka010SourceTableInfo.getFieldClasses()[i]); + } + + TypeInformation typeInformation = new RowTypeInfo(types, kafka010SourceTableInfo.getFields()); + + FlinkKafkaConsumer010 kafkaSrc; + + String fields= StringUtils.join(kafka010SourceTableInfo.getFields(), ","); + + + if ("json".equalsIgnoreCase(kafka010SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerJsonConsumer(Pattern.compile(topicName), + new CustomerJsonDeserialization(typeInformation), props); + } else { + kafkaSrc = new CustomerJsonConsumer(topicName, + new CustomerJsonDeserialization(typeInformation), props); + } + } else if ("csv".equalsIgnoreCase(kafka010SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerCsvConsumer(Pattern.compile(topicName), + new CustomerCsvDeserialization(typeInformation, + kafka010SourceTableInfo.getFieldDelimiter(), kafka010SourceTableInfo.getLengthCheckPolicy()), props); + } else { + kafkaSrc = new CustomerCsvConsumer(topicName, + new CustomerCsvDeserialization(typeInformation, + kafka010SourceTableInfo.getFieldDelimiter(), kafka010SourceTableInfo.getLengthCheckPolicy()), props); + } + } else { + if (topicIsPattern) { + kafkaSrc = new CustomerCommonConsumer(Pattern.compile(topicName), new CustomerCommonDeserialization(), props); + } else { + kafkaSrc = new CustomerCommonConsumer(topicName, new CustomerCommonDeserialization(), props); + } + } + + //earliest,latest + if("earliest".equalsIgnoreCase(offsetReset)){ + kafkaSrc.setStartFromEarliest(); + } else if (DtStringUtil.isJosn(offsetReset)) {// {"0":12312,"1":12321,"2":12312} + try { + Properties properties = PluginUtil.jsonStrToObject(offsetReset, Properties.class); + Map offsetMap = PluginUtil.ObjectToMap(properties); + Map specificStartupOffsets = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + specificStartupOffsets.put(new KafkaTopicPartition(topicName, Integer.valueOf(entry.getKey())), Long.valueOf(entry.getValue().toString())); + } + kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); + } catch (Exception e) { + throw new RuntimeException("not support offsetReset type:" + offsetReset); + } + }else{ + kafkaSrc.setStartFromLatest(); + } + + String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka010SourceTableInfo.getParallelism(); + if(parallelism != null){ + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); + } } diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java new file mode 100644 index 000000000..bbf91997a --- /dev/null +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java @@ -0,0 +1,77 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonConsumer extends FlinkKafkaConsumer010 { + + private CustomerCommonDeserialization customerCommonDeserialization; + + + public CustomerCommonConsumer(String topic, KeyedDeserializationSchema deserializer, Properties props) { + super(topic, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + public CustomerCommonConsumer(Pattern subscriptionPattern, KeyedDeserializationSchema deserializer, Properties props) { + super(subscriptionPattern, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCommonDeserialization.setRuntimeContext(getRuntimeContext()); + customerCommonDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerCommonDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java new file mode 100644 index 000000000..c576a445c --- /dev/null +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Arrays; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ + +public class CustomerCsvConsumer extends FlinkKafkaConsumer010 { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerCsvDeserialization customerCsvDeserialization; + + public CustomerCsvConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + public CustomerCsvConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } + + + + + @Override + public void run(SourceFunction.SourceContext sourceContext) throws Exception { + customerCsvDeserialization.setRuntimeContext(getRuntimeContext()); + customerCsvDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerCsvDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka09Consumer.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java similarity index 61% rename from kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka09Consumer.java rename to kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java index 052b4f048..e41337885 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka09Consumer.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java @@ -16,50 +16,61 @@ * limitations under the License. */ -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.consumer; import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.types.Row; import org.apache.flink.util.SerializedValue; +import java.util.Arrays; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** * Reason: - * Date: 2018/10/12 + * Date: 2018/10/19 * Company: www.dtstack.com + * * @author xuchao */ -public class CustomerKafka09Consumer extends FlinkKafkaConsumer09 { +public class CustomerJsonConsumer extends FlinkKafkaConsumer010 { - private static final long serialVersionUID = 4451177393982291909L; + private static final long serialVersionUID = -2265366268827807739L; private CustomerJsonDeserialization customerJsonDeserialization; - public CustomerKafka09Consumer(String topic, AbsDeserialization valueDeserializer, Properties props) { - super(topic, valueDeserializer, props); + public CustomerJsonConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; } + public CustomerJsonConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + @Override - public void run(SourceContext sourceContext) throws Exception { + public void run(SourceFunction.SourceContext sourceContext) throws Exception { customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); customerJsonDeserialization.initMetric(); super.run(sourceContext); } @Override - protected AbstractFetcher createFetcher(SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + protected AbstractFetcher createFetcher(SourceFunction.SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); customerJsonDeserialization.setFetcher(fetcher); return fetcher; diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java new file mode 100644 index 000000000..9c3b9b03f --- /dev/null +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java @@ -0,0 +1,156 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.deserialization; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.*; +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonDeserialization extends AbsDeserialization implements KeyedDeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(CustomerCommonDeserialization.class); + + public static final String[] KAFKA_COLUMNS = new String[]{"_TOPIC", "_MESSAGEKEY", "_MESSAGE", "_PARTITION", "_OFFSET"}; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + @Override + public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) { + if (firstMsg) { + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + firstMsg = false; + } + + numInRecord.inc(); + if(message!=null){numInBytes.inc(message.length);} + if(messageKey!=null){numInBytes.inc(messageKey.length);} + + try { + Row row = Row.of( + topic, //topic + messageKey == null ? null : new String(messageKey, UTF_8), //key + new String(message, UTF_8), //message + partition, + offset + ); + return row; + } catch (Throwable t) { + LOG.error(t.getMessage()); + dirtyDataCounter.inc(); + return null; + } + } + + @Override + public Row deserialize(byte[] message) throws IOException { + return null; + } + + public void setFetcher(AbstractFetcher fetcher) { + this.fetcher = fetcher; + } + + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + TypeInformation[] types = new TypeInformation[]{ + TypeExtractor.createTypeInfo(String.class), + TypeExtractor.createTypeInfo(String.class), //createTypeInformation[String] + TypeExtractor.createTypeInfo(String.class), + Types.INT, + Types.LONG + }; + return new RowTypeInfo(types, KAFKA_COLUMNS); + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if (!hasAssignedPartitions) { + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for (TopicPartition topicPartition : assignedPartitions) { + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } +} diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java new file mode 100644 index 000000000..50a8c5e12 --- /dev/null +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.deserialization; + + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.*; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ + +public class CustomerCsvDeserialization extends AbsDeserialization { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerCsvDeserialization.class); + + private static final long serialVersionUID = -2706012724306826506L; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + /** Flag indicating whether to fail on a missing field. */ + private boolean failOnMissingField; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + private String fieldDelimiter; + + private String lengthCheckPolicy; + + public CustomerCsvDeserialization(TypeInformation typeInfo, String fieldDelimiter, String lengthCheckPolicy){ + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + + this.fieldDelimiter = fieldDelimiter; + + this.lengthCheckPolicy = lengthCheckPolicy; + } + + @Override + public Row deserialize(byte[] message) throws IOException { + if(firstMsg){ + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + + firstMsg = false; + } + + try { + numInRecord.inc(); + numInBytes.inc(message.length); + String[] fieldsList = null; + if (message != null && message.length > 0){ + fieldsList = new String(message).split(fieldDelimiter); + } + if (fieldsList == null || fieldsList.length != fieldNames.length){//exception condition + if (lengthCheckPolicy.equalsIgnoreCase("SKIP")) { + return null; + }else if (lengthCheckPolicy.equalsIgnoreCase("EXCEPTION")) { + throw new RuntimeException("lengthCheckPolicy Error,message have "+fieldsList.length+" fields,sql have "+fieldNames.length); + } + } + + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + if (i fetcher) { + this.fetcher = fetcher; + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if(!hasAssignedPartitions){ + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for(TopicPartition topicPartition : assignedPartitions){ + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } + + public void setFailOnMissingField(boolean failOnMissingField) { + this.failOnMissingField = failOnMissingField; + } + +} diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java similarity index 98% rename from kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java rename to kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java index ec369b28f..24b607ee1 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java @@ -16,9 +16,9 @@ * limitations under the License. */ + - -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.deserialization; import com.dtstack.flink.sql.source.AbsDeserialization; @@ -123,6 +123,7 @@ public Row deserialize(byte[] message) throws IOException { return row; } catch (Throwable t) { //add metric of dirty data + LOG.error(t.getMessage()); dirtyDataCounter.inc(); return null; } diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index 5cba4e7f2..544771e2d 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -37,15 +37,29 @@ public class KafkaSourceParser extends AbsSourceParser { @Override public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { - KafkaSourceTableInfo kafka10SourceTableInfo = new KafkaSourceTableInfo(); kafka10SourceTableInfo.setName(tableName); parseFieldsInfo(fieldsInfo, kafka10SourceTableInfo); - kafka10SourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); - kafka10SourceTableInfo.setBootstrapServers(MathUtil.getString(props.get(KafkaSourceTableInfo.BOOTSTRAPSERVERS_KEY.toLowerCase()))); - kafka10SourceTableInfo.setGroupId(MathUtil.getString(props.get(KafkaSourceTableInfo.GROUPID_KEY.toLowerCase()))); - kafka10SourceTableInfo.setTopic(MathUtil.getString(props.get(KafkaSourceTableInfo.TOPIC_KEY.toLowerCase()))); + + kafka10SourceTableInfo.setPatternTopic(MathUtil.getBoolean(props.get(KafkaSourceTableInfo.PATTERNTOPIC_KEY.toLowerCase()))); + + kafka10SourceTableInfo.setTimeZone(MathUtil.getString(props.get(KafkaSourceTableInfo.TIME_ZONE_KEY.toLowerCase()))); + + if (props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE) != null) { + kafka10SourceTableInfo.setSourceDataType(props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE).toString()); + } + if (props.get(KafkaSourceTableInfo.FIELD_DELINITER) != null) { + kafka10SourceTableInfo.setFieldDelimiter(props.get(KafkaSourceTableInfo.FIELD_DELINITER).toString()); + } + if (props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY) != null) { + kafka10SourceTableInfo.setLengthCheckPolicy(props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY).toString()); + } + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka10SourceTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } return kafka10SourceTableInfo; } } diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index fff9cc8b9..e8db0e3df 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -23,6 +23,10 @@ import com.dtstack.flink.sql.table.SourceTableInfo; import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + /** * Reason: * Date: 2018/09/18 @@ -31,90 +35,62 @@ */ public class KafkaSourceTableInfo extends SourceTableInfo { - //version private static final String CURR_TYPE = "kafka10"; - public static final String BOOTSTRAPSERVERS_KEY = "bootstrapServers"; - - public static final String TOPIC_KEY = "topic"; - - public static final String GROUPID_KEY = "groupId"; - - private String bootstrapServers; - - private String topic; - - private String groupId; - - //latest, earliest - private String offsetReset = "latest"; - - private String offset; - - public KafkaSourceTableInfo(){ - super.setType(CURR_TYPE); - } + public static final String PATTERNTOPIC_KEY = "patterntopic"; + private Boolean patternTopic=false; - public String getBootstrapServers() { - return bootstrapServers; + public Boolean getPatternTopic() { + return patternTopic; } - public void setBootstrapServers(String bootstrapServers) { - this.bootstrapServers = bootstrapServers; + public void setPatternTopic(Boolean patternTopic) { + if (patternTopic==null){ + return; + } + this.patternTopic = patternTopic; } - public String getTopic() { - return topic; + public KafkaSourceTableInfo(){ + super.setType(CURR_TYPE); } - public void setTopic(String topic) { - this.topic = topic; - } + public Map kafkaParam = new HashMap(); - public String getGroupId() { - return groupId; + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); } - public void setGroupId(String groupId) { - this.groupId = groupId; + public String getKafkaParam(String key){ + return kafkaParam.get(key); } - public String getOffsetReset() { - return offsetReset; + public Boolean getKafkaBooleanParam(String key){ + return Boolean.valueOf(kafkaParam.getOrDefault(key,"false").toLowerCase()); } - public void setOffsetReset(String offsetReset) { - if(offsetReset == null){ - return; - } - - this.offsetReset = offsetReset; + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); } - public String getOffset() { - return offset; - } - public void setOffset(String offset) { - this.offset = offset; - } @Override public boolean check() { - Preconditions.checkNotNull(bootstrapServers, "kafka of bootstrapServers is required"); - Preconditions.checkNotNull(topic, "kafka of topic is required"); - Preconditions.checkNotNull(groupId, "kafka of groupId is required"); - Preconditions.checkState(offsetReset.equalsIgnoreCase("latest") - || offsetReset.equalsIgnoreCase("latest"), "kafka of offsetReset set fail"); + + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + String offset = kafkaParam.get("auto.offset.reset"); + Preconditions.checkState(offset.equalsIgnoreCase("latest") + || offset.equalsIgnoreCase("earliest"), "kafka of offsetReset set fail"); return false; } @Override public String getType() { -// return super.getType() + SOURCE_SUFFIX; return super.getType(); } } diff --git a/kafka10/pom.xml b/kafka10/pom.xml index 9c8c78bba..2f85f5c3b 100644 --- a/kafka10/pom.xml +++ b/kafka10/pom.xml @@ -14,9 +14,16 @@ kafka10-source + kafka10-sink + + org.apache.flink + flink-connector-kafka-0.10_2.11 + ${flink.version} + + junit junit diff --git a/kafka11/kafka11-sink/pom.xml b/kafka11/kafka11-sink/pom.xml new file mode 100644 index 000000000..4c748c90b --- /dev/null +++ b/kafka11/kafka11-sink/pom.xml @@ -0,0 +1,89 @@ + + + + sql.kafka11 + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kafka11 + 1.0-SNAPSHOT + kafka11-sink + jar + + + + + org.apache.flink + flink-json + ${flink.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java new file mode 100644 index 000000000..5184eba4f --- /dev/null +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerCsvSerialization.java @@ -0,0 +1,145 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.annotation.Internal; +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.table.shaded.org.apache.commons.lang.StringEscapeUtils; +import org.apache.flink.types.Row; +import org.apache.flink.types.StringValue; + +import java.io.IOException; + +import static org.apache.flink.api.java.typeutils.runtime.NullMaskUtils.writeNullMask; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +@Internal +public final class CustomerCsvSerialization extends TypeSerializerSingleton { + + private static final long serialVersionUID = 1L; + + private String fieldDelimiter = "\u0001"; + private TypeInformation[] fieldTypes; + private TypeSerializer[] fieldSerializers; + private static final Row EMPTY = null; + + public CustomerCsvSerialization(String fielddelimiter,TypeInformation[] fieldTypes) { + this.fieldDelimiter = fielddelimiter; + this.fieldTypes = fieldTypes; + this.fieldSerializers = (TypeSerializer[])createSerializer(new ExecutionConfig()); + } + + public TypeSerializer[] createSerializer(ExecutionConfig config) { + int len = fieldTypes.length; + TypeSerializer[] fieldSerializers = new TypeSerializer[len]; + for (int i = 0; i < len; i++) { + fieldSerializers[i] = fieldTypes[i].createSerializer(config); + } + return fieldSerializers; + } + + @Override + public boolean isImmutableType() { + return true; + } + + @Override + public Row createInstance() { + return EMPTY; + } + + @Override + public Row copy(Row from) { + return null; + } + + @Override + public Row copy(Row from, Row reuse) { + return null; + } + + @Override + public int getLength() { + return -1; + } + + @Override + public void serialize(Row record, DataOutputView target) throws IOException { + int len = fieldSerializers.length; + + if (record.getArity() != len) { + throw new RuntimeException("Row arity of from does not match serializers."); + } + + // write a null mask + writeNullMask(len, record, target); + + // serialize non-null fields + StringBuffer stringBuffer = new StringBuffer(); + for (int i = 0; i < len; i++) { + Object o = record.getField(i); + if (o != null) { + //fieldSerializers[i].serialize(o, target); + stringBuffer.append(o); + } + if(i != len-1){ + stringBuffer.append(StringEscapeUtils.unescapeJava(fieldDelimiter)); + //fieldSerializers[i].serialize(fieldDelimiter, target); + } + } + StringValue.writeString(stringBuffer.toString(), target); + } + + @Override + public Row deserialize(DataInputView source) throws IOException { + return null; + } + + @Override + public Row deserialize(Row reuse, DataInputView source) throws IOException { + return null; + } + + @Override + public void copy(DataInputView source, DataOutputView target) throws IOException { + StringValue.copyString(source, target); + } + + @Override + public boolean canEqual(Object obj) { + return obj instanceof CustomerCsvSerialization; + } + + @Override + protected boolean isCompatibleSerializationFormatIdentifier(String identifier) { + return super.isCompatibleSerializationFormatIdentifier(identifier) + || identifier.equals(StringValue.class.getCanonicalName()); + } +} diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java new file mode 100644 index 000000000..44383ec9b --- /dev/null +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.serialization.TypeInformationSerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowSerializationSchema; +import org.apache.flink.streaming.api.datastream.DataStream; + +import org.apache.flink.streaming.connectors.kafka.Kafka011TableSink; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSink; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.api.TableSchemaBuilder; +import org.apache.flink.table.sinks.AppendStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; + +/** + * kafka result table + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSink implements AppendStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + + protected TypeInformation[] fieldTypes; + + /** The schema of the table. */ + private TableSchema schema; + + /** The Kafka topic to write to. */ + protected String topic; + + /** Properties for the Kafka producer. */ + protected Properties properties; + + /** Serialization schema for encoding records to Kafka. */ + protected SerializationSchema serializationSchema; + + /** Partitioner to select Kafka partition for each item. */ + protected Optional> partitioner; + + @Override + public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { + KafkaSinkTableInfo kafka011SinkTableInfo = (KafkaSinkTableInfo) targetTableInfo; + this.topic = kafka011SinkTableInfo.getKafkaParam("topic"); + + Properties props = new Properties(); + for (String key:kafka011SinkTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka011SinkTableInfo.getKafkaParam(key)); + } + this.properties = props; + this.partitioner = Optional.of(new FlinkFixedPartitioner<>()); + this.fieldNames = kafka011SinkTableInfo.getFields(); + TypeInformation[] types = new TypeInformation[kafka011SinkTableInfo.getFields().length]; + for(int i = 0; i< kafka011SinkTableInfo.getFieldClasses().length; i++){ + types[i] = TypeInformation.of(kafka011SinkTableInfo.getFieldClasses()[i]); + } + this.fieldTypes = types; + + TableSchemaBuilder schemaBuilder = TableSchema.builder(); + for (int i=0;i dataStream) { + KafkaTableSink kafkaTableSink = new Kafka011TableSink( + schema, + topic, + properties, + partitioner, + serializationSchema + ); + + kafkaTableSink.emitDataStream(dataStream); + } + + @Override + public TypeInformation getOutputType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public TableSink configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + +} diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java new file mode 100644 index 000000000..9bf83dccc --- /dev/null +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KafkaSinkTableInfo kafka11SinkTableInfo = new KafkaSinkTableInfo(); + kafka11SinkTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka11SinkTableInfo); + kafka11SinkTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase()))); + if (props.get(KafkaSinkTableInfo.SINK_DATA_TYPE) != null) { + kafka11SinkTableInfo.setSinkDataType(props.get(KafkaSinkTableInfo.SINK_DATA_TYPE).toString()); + } + if (props.get(KafkaSinkTableInfo.FIELD_DELINITER) != null) { + kafka11SinkTableInfo.setFieldDelimiter(props.get(KafkaSinkTableInfo.FIELD_DELINITER).toString()); + } + + for (String key:props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka11SinkTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka11SinkTableInfo; + } +} diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java new file mode 100644 index 000000000..78edf17b4 --- /dev/null +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class KafkaSinkTableInfo extends TargetTableInfo { + //version + private static final String CURR_TYPE = "kafka11"; + + public KafkaSinkTableInfo(){ + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap(); + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + //Preconditions.checkNotNull(kafkaParam.get("groupId"), "kafka of groupId is required"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka11/kafka11-source/pom.xml b/kafka11/kafka11-source/pom.xml index 98182aa64..ae6997f7e 100644 --- a/kafka11/kafka11-source/pom.xml +++ b/kafka11/kafka11-source/pom.xml @@ -13,15 +13,6 @@ kafka11-source http://maven.apache.org - - - - org.apache.flink - flink-connector-kafka-0.11_2.11 - ${flink.version} - - - diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index d10151920..0f538aa1d 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -16,11 +16,16 @@ * limitations under the License. */ - package com.dtstack.flink.sql.source.kafka; import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCommonConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerCsvConsumer; +import com.dtstack.flink.sql.source.kafka.consumer.CustomerJsonConsumer; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import com.dtstack.flink.sql.source.kafka.table.KafkaSourceTableInfo; import com.dtstack.flink.sql.table.SourceTableInfo; import com.dtstack.flink.sql.util.DtStringUtil; @@ -28,7 +33,9 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; import org.apache.flink.table.api.Table; @@ -38,66 +45,101 @@ import java.util.HashMap; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** * If eventtime field is specified, the default time field rowtime * Date: 2018/09/18 * Company: www.dtstack.com + * * @author sishu.yss */ public class KafkaSource implements IStreamSourceGener

{ - private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; + private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; - /** - * Get kafka data source, you need to provide the data field names, data types - * If you do not specify auto.offset.reset, the default use groupoffset - * @param sourceTableInfo - * @return - */ - @SuppressWarnings("rawtypes") - @Override + /** + * Get kafka data source, you need to provide the data field names, data types + * If you do not specify auto.offset.reset, the default use groupoffset + * + * @param sourceTableInfo + * @return + */ + @SuppressWarnings("rawtypes") + @Override public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { - KafkaSourceTableInfo kafka011SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; - String topicName = kafka011SourceTableInfo.getTopic(); + KafkaSourceTableInfo kafka011SourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; + String topicName = kafka011SourceTableInfo.getKafkaParam("topic"); + String offsetReset = kafka011SourceTableInfo.getKafkaParam("auto.offset.reset"); + Boolean topicIsPattern = kafka011SourceTableInfo.getPatternTopic(); + + Properties props = new Properties(); + for (String key : kafka011SourceTableInfo.getKafkaParamKeys()) { + props.setProperty(key, kafka011SourceTableInfo.getKafkaParam(key)); + } - Properties props = new Properties(); - props.setProperty("bootstrap.servers", kafka011SourceTableInfo.getBootstrapServers()); - props.setProperty("auto.offset.reset", kafka011SourceTableInfo.getOffsetReset()); - //TODO props.setProperty("zookeeper.connect", kafka09SourceTableInfo.) + TypeInformation[] types = new TypeInformation[kafka011SourceTableInfo.getFields().length]; + for (int i = 0; i < kafka011SourceTableInfo.getFieldClasses().length; i++) { + types[i] = TypeInformation.of(kafka011SourceTableInfo.getFieldClasses()[i]); + } - TypeInformation[] types = new TypeInformation[kafka011SourceTableInfo.getFields().length]; - for(int i = 0; i< kafka011SourceTableInfo.getFieldClasses().length; i++){ - types[i] = TypeInformation.of(kafka011SourceTableInfo.getFieldClasses()[i]); - } + TypeInformation typeInformation = new RowTypeInfo(types, kafka011SourceTableInfo.getFields()); - TypeInformation typeInformation = new RowTypeInfo(types, kafka011SourceTableInfo.getFields()); - FlinkKafkaConsumer011 kafkaSrc = new CustomerKafka011Consumer(topicName, - new CustomerJsonDeserialization(typeInformation), props); + FlinkKafkaConsumer011 kafkaSrc; + String fields = StringUtils.join(kafka011SourceTableInfo.getFields(), ","); - //earliest,latest - if("earliest".equalsIgnoreCase(kafka011SourceTableInfo.getOffsetReset())){ - kafkaSrc.setStartFromEarliest(); - }else if(DtStringUtil.isJosn(kafka011SourceTableInfo.getOffsetReset())){// {"0":12312,"1":12321,"2":12312} - try { - Properties properties = PluginUtil.jsonStrToObject(kafka011SourceTableInfo.getOffsetReset(), Properties.class); - Map offsetMap = PluginUtil.ObjectToMap(properties); - Map specificStartupOffsets = new HashMap<>(); - for(Map.Entry entry:offsetMap.entrySet()){ - specificStartupOffsets.put(new KafkaTopicPartition(topicName,Integer.valueOf(entry.getKey())),Long.valueOf(entry.getValue().toString())); - } - kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); - } catch (Exception e) { - throw new RuntimeException("not support offsetReset type:" + kafka011SourceTableInfo.getOffsetReset()); - } - }else{ - kafkaSrc.setStartFromLatest(); - } + if ("json".equalsIgnoreCase(kafka011SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerJsonConsumer(Pattern.compile(topicName), + new CustomerJsonDeserialization(typeInformation), props); + } else { + kafkaSrc = new CustomerJsonConsumer(topicName, + new CustomerJsonDeserialization(typeInformation), props); + } + } else if ("csv".equalsIgnoreCase(kafka011SourceTableInfo.getSourceDataType())) { + if (topicIsPattern) { + kafkaSrc = new CustomerCsvConsumer(Pattern.compile(topicName), + new CustomerCsvDeserialization(typeInformation, + kafka011SourceTableInfo.getFieldDelimiter(), kafka011SourceTableInfo.getLengthCheckPolicy()), props); + } else { + kafkaSrc = new CustomerCsvConsumer(topicName, + new CustomerCsvDeserialization(typeInformation, + kafka011SourceTableInfo.getFieldDelimiter(), kafka011SourceTableInfo.getLengthCheckPolicy()), props); + } + } else { + if (topicIsPattern) { + kafkaSrc = new CustomerCommonConsumer(Pattern.compile(topicName), new CustomerCommonDeserialization(), props); + } else { + kafkaSrc = new CustomerCommonConsumer(topicName, new CustomerCommonDeserialization(), props); + } + } - String fields = StringUtils.join(kafka011SourceTableInfo.getFields(), ","); - String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); - } + //earliest,latest + if ("earliest".equalsIgnoreCase(offsetReset)) { + kafkaSrc.setStartFromEarliest(); + } else if (DtStringUtil.isJosn(offsetReset)) {// {"0":12312,"1":12321,"2":12312} + try { + Properties properties = PluginUtil.jsonStrToObject(offsetReset, Properties.class); + Map offsetMap = PluginUtil.ObjectToMap(properties); + Map specificStartupOffsets = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + specificStartupOffsets.put(new KafkaTopicPartition(topicName, Integer.valueOf(entry.getKey())), Long.valueOf(entry.getValue().toString())); + } + kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); + } catch (Exception e) { + throw new RuntimeException("not support offsetReset type:" + offsetReset); + } + } else { + kafkaSrc.setStartFromLatest(); + } + String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka011SourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); + } } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java new file mode 100644 index 000000000..e46c2dc75 --- /dev/null +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCommonConsumer.java @@ -0,0 +1,75 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.kafka.consumer; + +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCommonDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonConsumer extends FlinkKafkaConsumer011 { + + private CustomerCommonDeserialization customerCommonDeserialization; + + + public CustomerCommonConsumer(String topic, KeyedDeserializationSchema deserializer, Properties props) { + super(topic, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + public CustomerCommonConsumer(Pattern subscriptionPattern, KeyedDeserializationSchema deserializer, Properties props) { + super(subscriptionPattern, deserializer, props); + this.customerCommonDeserialization= (CustomerCommonDeserialization) deserializer; + } + + + @Override + public void run(SourceContext sourceContext) throws Exception { + customerCommonDeserialization.setRuntimeContext(getRuntimeContext()); + customerCommonDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerCommonDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka010Consumer.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java similarity index 64% rename from kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka010Consumer.java rename to kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java index 37c1d6ab6..66a83bdd1 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka010Consumer.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerCsvConsumer.java @@ -16,53 +16,68 @@ * limitations under the License. */ -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.consumer; import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerCsvDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; -import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; import org.apache.flink.types.Row; import org.apache.flink.util.SerializedValue; +import java.util.Arrays; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** - * Reason: - * Date: 2018/10/19 + * + * Date: 2018/12/18 * Company: www.dtstack.com - * @author xuchao + * @author DocLi + * + * @modifyer maqi + * */ -public class CustomerKafka010Consumer extends FlinkKafkaConsumer010 { +public class CustomerCsvConsumer extends FlinkKafkaConsumer011 { - private static final long serialVersionUID = 4873757508981691375L; + private static final long serialVersionUID = -2265366268827807739L; - private CustomerJsonDeserialization customerJsonDeserialization; + private CustomerCsvDeserialization customerCsvDeserialization; + + public CustomerCsvConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; + } - public CustomerKafka010Consumer(String topic, AbsDeserialization valueDeserializer, Properties props) { - super(topic, valueDeserializer, props); - this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + public CustomerCsvConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerCsvDeserialization = (CustomerCsvDeserialization) valueDeserializer; } + + + @Override public void run(SourceContext sourceContext) throws Exception { - customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); - customerJsonDeserialization.initMetric(); + customerCsvDeserialization.setRuntimeContext(getRuntimeContext()); + customerCsvDeserialization.initMetric(); super.run(sourceContext); } @Override protected AbstractFetcher createFetcher(SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); - customerJsonDeserialization.setFetcher(fetcher); + customerCsvDeserialization.setFetcher(fetcher); return fetcher; } - } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka011Consumer.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java similarity index 78% rename from kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka011Consumer.java rename to kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java index 743e7f6d4..568ef54c0 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafka011Consumer.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/consumer/CustomerJsonConsumer.java @@ -16,24 +16,27 @@ * limitations under the License. */ -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.consumer; import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.deserialization.CustomerJsonDeserialization; import org.apache.flink.metrics.MetricGroup; import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; -import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; import org.apache.flink.types.Row; import org.apache.flink.util.SerializedValue; +import java.util.Arrays; import java.util.Map; import java.util.Properties; +import java.util.regex.Pattern; /** * Reason: @@ -43,17 +46,23 @@ * @author xuchao */ -public class CustomerKafka011Consumer extends FlinkKafkaConsumer011 { +public class CustomerJsonConsumer extends FlinkKafkaConsumer011 { private static final long serialVersionUID = -2265366268827807739L; private CustomerJsonDeserialization customerJsonDeserialization; - public CustomerKafka011Consumer(String topic, AbsDeserialization valueDeserializer, Properties props) { - super(topic, valueDeserializer, props); + public CustomerJsonConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; } + public CustomerJsonConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + @Override public void run(SourceContext sourceContext) throws Exception { customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java new file mode 100644 index 000000000..a032e6a82 --- /dev/null +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCommonDeserialization.java @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.deserialization; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TypeExtractor; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.util.serialization.KeyedDeserializationSchema; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.DT_PARTITION_GROUP; +import static com.dtstack.flink.sql.metric.MetricConstant.DT_TOPIC_GROUP; +import static com.dtstack.flink.sql.metric.MetricConstant.DT_TOPIC_PARTITION_LAG_GAUGE; +import static java.nio.charset.StandardCharsets.UTF_8; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCommonDeserialization extends AbsDeserialization implements KeyedDeserializationSchema { + private static final Logger LOG = LoggerFactory.getLogger(CustomerCommonDeserialization.class); + + public static final String[] KAFKA_COLUMNS = new String[]{"_TOPIC", "_MESSAGEKEY", "_MESSAGE", "_PARTITION", "_OFFSET"}; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + @Override + public Row deserialize(byte[] messageKey, byte[] message, String topic, int partition, long offset) { + if (firstMsg) { + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + firstMsg = false; + } + + numInRecord.inc(); + if(message!=null){numInBytes.inc(message.length);} + if(messageKey!=null){numInBytes.inc(messageKey.length);} + + try { + Row row = Row.of( + topic, //topic + messageKey == null ? null : new String(messageKey, UTF_8), //key + new String(message, UTF_8), //message + partition, + offset + ); + return row; + } catch (Throwable t) { + LOG.error(t.getMessage()); + dirtyDataCounter.inc(); + return null; + } + } + + @Override + public Row deserialize(byte[] message) throws IOException { + return null; + } + + public void setFetcher(AbstractFetcher fetcher) { + this.fetcher = fetcher; + } + + + @Override + public boolean isEndOfStream(Row nextElement) { + return false; + } + + @Override + public TypeInformation getProducedType() { + TypeInformation[] types = new TypeInformation[]{ + TypeExtractor.createTypeInfo(String.class), + TypeExtractor.createTypeInfo(String.class), //createTypeInformation[String] + TypeExtractor.createTypeInfo(String.class), + Types.INT, + Types.LONG + }; + return new RowTypeInfo(types, KAFKA_COLUMNS); + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if (!hasAssignedPartitions) { + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for (TopicPartition topicPartition : assignedPartitions) { + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } +} diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java new file mode 100644 index 000000000..cd0eb6be7 --- /dev/null +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + + +package com.dtstack.flink.sql.source.kafka.deserialization; + + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.DT_PARTITION_GROUP; +import static com.dtstack.flink.sql.metric.MetricConstant.DT_TOPIC_GROUP; +import static com.dtstack.flink.sql.metric.MetricConstant.DT_TOPIC_PARTITION_LAG_GAUGE; + +/** + * + * Date: 2018/12/18 + * Company: www.dtstack.com + * @author DocLi + * + * @modifyer maqi + * + */ +public class CustomerCsvDeserialization extends AbsDeserialization { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerCsvDeserialization.class); + + private static final long serialVersionUID = -2706012724306826506L; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + /** Flag indicating whether to fail on a missing field. */ + private boolean failOnMissingField; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + private String fieldDelimiter; + + private String lengthCheckPolicy; + + public CustomerCsvDeserialization(TypeInformation typeInfo, String fieldDelimiter, String lengthCheckPolicy){ + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + + this.fieldDelimiter = fieldDelimiter; + + this.lengthCheckPolicy = lengthCheckPolicy; + } + + @Override + public Row deserialize(byte[] message) throws IOException { + if(firstMsg){ + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + + firstMsg = false; + } + + try { + numInRecord.inc(); + numInBytes.inc(message.length); + String[] fieldsList = null; + if (message != null && message.length > 0){ + fieldsList = new String(message).split(fieldDelimiter); + } + if (fieldsList == null || fieldsList.length != fieldNames.length){//exception condition + if (lengthCheckPolicy.equalsIgnoreCase("SKIP")) { + return null; + }else if (lengthCheckPolicy.equalsIgnoreCase("EXCEPTION")) { + throw new RuntimeException("lengthCheckPolicy Error,message have "+fieldsList.length+" fields,sql have "+fieldNames.length); + } + } + + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + if (i fetcher) { + this.fetcher = fetcher; + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if(!hasAssignedPartitions){ + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for(TopicPartition topicPartition : assignedPartitions){ + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } + + public void setFailOnMissingField(boolean failOnMissingField) { + this.failOnMissingField = failOnMissingField; + } + +} diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java similarity index 98% rename from kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java rename to kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java index 78f1c17ff..e72fd0303 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java @@ -18,7 +18,7 @@ -package com.dtstack.flink.sql.source.kafka; +package com.dtstack.flink.sql.source.kafka.deserialization; import com.dtstack.flink.sql.source.AbsDeserialization; @@ -125,6 +125,7 @@ public Row deserialize(byte[] message) throws IOException { return row; } catch (Throwable t) { //add metric of dirty data + LOG.error(t.getMessage()); dirtyDataCounter.inc(); return null; } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index fff3cb9c9..f2297f9c7 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -16,7 +16,6 @@ * limitations under the License. */ - package com.dtstack.flink.sql.source.kafka.table; @@ -30,22 +29,38 @@ * Reason: * Date: 2018/09/18 * Company: www.dtstack.com + * * @author sishu.yss */ public class KafkaSourceParser extends AbsSourceParser { - @Override - public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { - KafkaSourceTableInfo kafka11SourceTableInfo = new KafkaSourceTableInfo(); - kafka11SourceTableInfo.setName(tableName); - parseFieldsInfo(fieldsInfo, kafka11SourceTableInfo); + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + + KafkaSourceTableInfo kafka11SourceTableInfo = new KafkaSourceTableInfo(); + kafka11SourceTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafka11SourceTableInfo); + kafka11SourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); + kafka11SourceTableInfo.setPatternTopic(MathUtil.getBoolean(props.get(KafkaSourceTableInfo.PATTERNTOPIC_KEY.toLowerCase()))); + + kafka11SourceTableInfo.setTimeZone(MathUtil.getString(props.get(KafkaSourceTableInfo.TIME_ZONE_KEY.toLowerCase()))); - kafka11SourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); - kafka11SourceTableInfo.setBootstrapServers(MathUtil.getString(props.get(KafkaSourceTableInfo.BOOTSTRAPSERVERS_KEY.toLowerCase()))); - kafka11SourceTableInfo.setGroupId(MathUtil.getString(props.get(KafkaSourceTableInfo.GROUPID_KEY.toLowerCase()))); - kafka11SourceTableInfo.setTopic(MathUtil.getString(props.get(KafkaSourceTableInfo.TOPIC_KEY.toLowerCase()))); - return kafka11SourceTableInfo; - } + if (props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE) != null) { + kafka11SourceTableInfo.setSourceDataType(props.get(KafkaSourceTableInfo.SOURCE_DATA_TYPE).toString()); + } + if (props.get(KafkaSourceTableInfo.FIELD_DELINITER) != null) { + kafka11SourceTableInfo.setFieldDelimiter(props.get(KafkaSourceTableInfo.FIELD_DELINITER).toString()); + } + if (props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY) != null) { + kafka11SourceTableInfo.setLengthCheckPolicy(props.get(KafkaSourceTableInfo.LENGTH_CHECK_POLICY).toString()); + } + for (String key : props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafka11SourceTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + return kafka11SourceTableInfo; + } } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 4e7397970..983a1882a 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -16,105 +16,72 @@ * limitations under the License. */ - package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; /** * Reason: * Date: 2018/09/18 * Company: www.dtstack.com + * * @author sishu.yss */ public class KafkaSourceTableInfo extends SourceTableInfo { - - //version - private static final String CURR_TYPE = "kafka11"; - - public static final String BOOTSTRAPSERVERS_KEY = "bootstrapServers"; - - public static final String TOPIC_KEY = "topic"; - - public static final String GROUPID_KEY = "groupId"; - - private String bootstrapServers; - - private String topic; - - private String groupId; - - //latest, earliest - private String offsetReset = "latest"; - - private String offset; - - public KafkaSourceTableInfo(){ - super.setType(CURR_TYPE); - } - - - public String getBootstrapServers() { - return bootstrapServers; - } - - public void setBootstrapServers(String bootstrapServers) { - this.bootstrapServers = bootstrapServers; - } - - public String getTopic() { - return topic; - } - - public void setTopic(String topic) { - this.topic = topic; - } - - public String getGroupId() { - return groupId; - } - - public void setGroupId(String groupId) { - this.groupId = groupId; - } - - public String getOffsetReset() { - return offsetReset; - } - - public void setOffsetReset(String offsetReset) { - if(offsetReset == null){ - return; - } - - this.offsetReset = offsetReset; - } - - public String getOffset() { - return offset; - } - - public void setOffset(String offset) { - this.offset = offset; - } - - @Override - public boolean check() { - Preconditions.checkNotNull(bootstrapServers, "kafka of bootstrapServers is required"); - Preconditions.checkNotNull(topic, "kafka of topic is required"); - Preconditions.checkNotNull(groupId, "kafka of groupId is required"); - Preconditions.checkState(offsetReset.equalsIgnoreCase("latest") - || offsetReset.equalsIgnoreCase("latest"), "kafka of offsetReset set fail"); - - return false; - } - - @Override - public String getType() { -// return super.getType() + SOURCE_SUFFIX; - return super.getType(); - } + //version + private static final String CURR_TYPE = "kafka11"; + + public static final String PATTERNTOPIC_KEY = "patterntopic"; + + private Boolean patternTopic = false; + + public Boolean getPatternTopic() { + return patternTopic; + } + + public void setPatternTopic(Boolean patternTopic) { + if (patternTopic == null) { + return; + } + this.patternTopic = patternTopic; + } + + public KafkaSourceTableInfo() { + super.setType(CURR_TYPE); + } + + public Map kafkaParam = new HashMap<>(); + + public void addKafkaParam(String key, String value) { + kafkaParam.put(key, value); + } + + public String getKafkaParam(String key) { + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys() { + return kafkaParam.keySet(); + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kafkaParam.get("bootstrap.servers"), "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(kafkaParam.get("topic"), "kafka of topic is required"); + String offset = kafkaParam.get("auto.offset.reset"); + Preconditions.checkState(offset.equalsIgnoreCase("latest") + || offset.equalsIgnoreCase("earliest"), "kafka of offsetReset set fail"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } } diff --git a/kafka11/pom.xml b/kafka11/pom.xml index 841e20be9..f1f57aa03 100644 --- a/kafka11/pom.xml +++ b/kafka11/pom.xml @@ -14,9 +14,17 @@ kafka11-source + kafka11-sink + + org.apache.flink + flink-connector-kafka-0.11_2.11 + ${flink.version} + + + junit junit @@ -33,4 +41,5 @@ + \ No newline at end of file diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/ClusterClientFactory.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/ClusterClientFactory.java index 9dfe2e07e..f02a81819 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/ClusterClientFactory.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/ClusterClientFactory.java @@ -18,12 +18,9 @@ package com.dtstack.flink.sql.launcher; -import com.dtstack.flink.sql.ClusterMode; import org.apache.commons.lang.StringUtils; -import org.apache.flink.client.deployment.StandaloneClusterDescriptor; -import org.apache.flink.client.deployment.StandaloneClusterId; import org.apache.flink.client.program.ClusterClient; -import org.apache.flink.client.program.rest.RestClusterClient; +import org.apache.flink.client.program.StandaloneClusterClient; import org.apache.flink.configuration.ConfigConstants; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.GlobalConfiguration; @@ -38,18 +35,14 @@ import org.apache.hadoop.yarn.api.records.YarnApplicationState; import org.apache.hadoop.yarn.client.api.YarnClient; import org.apache.hadoop.yarn.conf.YarnConfiguration; - import java.net.InetSocketAddress; -import java.util.EnumSet; -import java.util.HashSet; -import java.util.List; -import java.util.Set; +import java.util.*; + +import com.dtstack.flink.sql.ClusterMode; +import com.dtstack.flink.sql.options.LauncherOptions; /** - * The Factory of ClusterClient - * - * Company: www.dtstack.com - * @author huyifan.zju@163.com + * @author sishu.yss */ public class ClusterClientFactory { @@ -60,17 +53,13 @@ public static ClusterClient createClusterClient(LauncherOptions launcherOptions) } else if(mode.equals(ClusterMode.yarn.name())) { return createYarnClient(launcherOptions); } - throw new IllegalArgumentException("Unsupported cluster client type: "); } public static ClusterClient createStandaloneClient(LauncherOptions launcherOptions) throws Exception { String flinkConfDir = launcherOptions.getFlinkconf(); Configuration config = GlobalConfiguration.loadConfiguration(flinkConfDir); - - StandaloneClusterDescriptor standaloneClusterDescriptor = new StandaloneClusterDescriptor(config); - RestClusterClient clusterClient = standaloneClusterDescriptor.retrieve(StandaloneClusterId.getInstance()); - + StandaloneClusterClient clusterClient = new StandaloneClusterClient(config); LeaderConnectionInfo connectionInfo = clusterClient.getClusterConnectionInfo(); InetSocketAddress address = AkkaUtils.getInetSocketAddressFromAkkaURL(connectionInfo.getAddress()); config.setString(JobManagerOptions.ADDRESS, address.getAddress().getHostName()); @@ -140,4 +129,4 @@ public static ClusterClient createYarnClient(LauncherOptions launcherOptions) { -} +} \ No newline at end of file diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java index 10bc09475..b6a8c5c64 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java @@ -23,8 +23,10 @@ import avro.shaded.com.google.common.collect.Lists; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.TypeReference; +import com.dtstack.flink.sql.ClusterMode; import com.dtstack.flink.sql.Main; import com.dtstack.flink.sql.launcher.perjob.PerJobSubmitter; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.client.program.ClusterClient; import org.apache.flink.client.program.PackagedProgram; @@ -32,15 +34,14 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; - -import com.dtstack.flink.sql.ClusterMode; import org.apache.flink.client.program.PackagedProgramUtils; import org.apache.flink.configuration.Configuration; import org.apache.flink.configuration.GlobalConfiguration; import org.apache.flink.runtime.jobgraph.JobGraph; -import org.apache.flink.table.shaded.org.apache.commons.lang.StringUtils; import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings; import org.apache.flink.table.shaded.org.apache.commons.lang.BooleanUtils; +import com.dtstack.flink.sql.options.*; + /** * Date: 2017/2/20 @@ -59,39 +60,35 @@ private static String getLocalCoreJarPath(String localSqlRootJar){ } public static void main(String[] args) throws Exception { - if (args.length==1 && args[0].endsWith(".json")){ + if (args.length == 1 && args[0].endsWith(".json")){ args = parseJson(args); } LauncherOptionParser optionParser = new LauncherOptionParser(args); LauncherOptions launcherOptions = optionParser.getLauncherOptions(); String mode = launcherOptions.getMode(); List argList = optionParser.getProgramExeArgList(); + String[] localArgs = argList.toArray(new String[argList.size()]); if(mode.equals(ClusterMode.local.name())) { - String[] localArgs = argList.toArray(new String[argList.size()]); Main.main(localArgs); - return; - } - - String pluginRoot = launcherOptions.getLocalSqlPluginPath(); - File jarFile = new File(getLocalCoreJarPath(pluginRoot)); - String[] remoteArgs = argList.toArray(new String[argList.size()]); - PackagedProgram program = new PackagedProgram(jarFile, Lists.newArrayList(), remoteArgs); - - if(StringUtils.isNotBlank(launcherOptions.getSavePointPath())){ - program.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(launcherOptions.getSavePointPath(), BooleanUtils.toBoolean(launcherOptions.getAllowNonRestoredState()))); - } - - if(mode.equals(ClusterMode.yarnPer.name())){ - String flinkConfDir = launcherOptions.getFlinkconf(); - Configuration config = GlobalConfiguration.loadConfiguration(flinkConfDir); - JobGraph jobGraph = PackagedProgramUtils.createJobGraph(program, config, 1); - PerJobSubmitter.submit(launcherOptions, jobGraph); - } else { - ClusterClient clusterClient = ClusterClientFactory.createClusterClient(launcherOptions); - clusterClient.run(program, 1); - clusterClient.shutdown(); + }else{ + String pluginRoot = launcherOptions.getLocalSqlPluginPath(); + File jarFile = new File(getLocalCoreJarPath(pluginRoot)); + PackagedProgram program = new PackagedProgram(jarFile, Lists.newArrayList(), localArgs); + if(StringUtils.isNotBlank(launcherOptions.getSavePointPath())){ + program.setSavepointRestoreSettings(SavepointRestoreSettings.forPath(launcherOptions.getSavePointPath(), BooleanUtils.toBoolean(launcherOptions.getAllowNonRestoredState()))); + } + if(mode.equals(ClusterMode.yarnPer.name())){ + String flinkConfDir = launcherOptions.getFlinkconf(); + Configuration config = GlobalConfiguration.loadConfiguration(flinkConfDir); + JobGraph jobGraph = PackagedProgramUtils.createJobGraph(program, config, 1); + PerJobSubmitter.submit(launcherOptions, jobGraph); + } else { + ClusterClient clusterClient = ClusterClientFactory.createClusterClient(launcherOptions); + clusterClient.run(program, 1); + clusterClient.shutdown(); + System.exit(0); + } } - System.out.println("---submit end----"); } diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java index 3cdf3f8d4..7944142a2 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java @@ -18,8 +18,9 @@ package com.dtstack.flink.sql.launcher.perjob; -import com.dtstack.flink.sql.launcher.LauncherOptions; +import com.dtstack.flink.sql.options.LauncherOptions; import com.dtstack.flink.sql.util.PluginUtil; +import org.apache.flink.api.common.cache.DistributedCache; import org.apache.flink.client.deployment.ClusterSpecification; import org.apache.flink.client.program.ClusterClient; import org.apache.flink.runtime.jobgraph.JobGraph; @@ -27,7 +28,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - +import java.net.MalformedURLException; +import java.net.URL; +import java.util.Map; import java.util.Properties; /** @@ -39,28 +42,33 @@ public class PerJobSubmitter { + private final static String CLASS_FILE_NAME_PRESTR = "class_path"; + private static final Logger LOG = LoggerFactory.getLogger(PerJobSubmitter.class); public static String submit(LauncherOptions launcherOptions, JobGraph jobGraph) throws Exception { - Properties confProperties = PluginUtil.jsonStrToObject(launcherOptions.getConfProp(), Properties.class); ClusterSpecification clusterSpecification = FLinkPerJobResourceUtil.createClusterSpecification(confProperties); - PerJobClusterClientBuilder perJobClusterClientBuilder = new PerJobClusterClientBuilder(); perJobClusterClientBuilder.init(launcherOptions.getYarnconf()); - String flinkJarPath = launcherOptions.getFlinkJarPath(); - AbstractYarnClusterDescriptor yarnClusterDescriptor = perJobClusterClientBuilder.createPerJobClusterDescriptor(confProperties, flinkJarPath, launcherOptions.getQueue()); - ClusterClient clusterClient = yarnClusterDescriptor.deployJobCluster(clusterSpecification, jobGraph,true); - + ClusterClient clusterClient = yarnClusterDescriptor.deployJobCluster(clusterSpecification,fillJobGraphClassPath(jobGraph),true); String applicationId = clusterClient.getClusterId().toString(); String flinkJobId = jobGraph.getJobID().toString(); - String tips = String.format("deploy per_job with appId: %s, jobId: %s", applicationId, flinkJobId); System.out.println(tips); LOG.info(tips); - return applicationId; } + + private static JobGraph fillJobGraphClassPath(JobGraph jobGraph) throws MalformedURLException { + Map jobCacheFileConfig = jobGraph.getUserArtifacts(); + for(Map.Entry tmp : jobCacheFileConfig.entrySet()){ + if(tmp.getKey().startsWith(CLASS_FILE_NAME_PRESTR)){ + jobGraph.getClasspaths().add(new URL("file:" + tmp.getValue().filePath)); + } + } + return jobGraph; + } } diff --git a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java index e1a398d4c..d1a38ca5f 100644 --- a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java +++ b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java @@ -80,7 +80,7 @@ public MongoAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List cacheInfo = (Map) sideInput; Row row = new Row(sideInfo.getOutFieldInfoList().size()); for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { @@ -152,7 +152,6 @@ public void flatMap(Row value, Collector out) throws Exception { } for (Map one : cacheList) { - System.out.println(fillData(value, one)); out.collect(fillData(value, one)); } } diff --git a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java index d376bcdde..226885ed7 100644 --- a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java +++ b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java @@ -130,6 +130,7 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except Object equalObj = input.getField(conValIndex); if (equalObj == null) { resultFuture.complete(null); + return; } basicDBObject.put(sideInfo.getEqualFieldList().get(i), equalObj); } @@ -142,12 +143,12 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except dealMissKey(input, resultFuture); return; } else if (ECacheContentType.MultiLine == val.getType()) { - + List rowList = Lists.newArrayList(); for (Object jsonArray : (List) val.getContent()) { Row row = fillData(input, jsonArray); - resultFuture.complete(Collections.singleton(row)); + rowList.add(row); } - + resultFuture.complete(rowList); } else { throw new RuntimeException("not support cache obj type " + val.getType()); } diff --git a/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java b/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java index 6d4d86d23..d045729e8 100644 --- a/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java +++ b/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java @@ -23,7 +23,6 @@ import com.dtstack.flink.sql.sink.IStreamSinkGener; import com.dtstack.flink.sql.sink.rdb.RdbSink; import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; - import java.util.List; import java.util.Map; diff --git a/oracle/oracle-side/oracle-all-side/pom.xml b/oracle/oracle-side/oracle-all-side/pom.xml new file mode 100644 index 000000000..458077376 --- /dev/null +++ b/oracle/oracle-side/oracle-all-side/pom.xml @@ -0,0 +1,91 @@ + + + + sql.side.oracle + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.all.oracle + 1.0-SNAPSHOT + oracle-all-side + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.oracle.core + ${sql.side.oracle.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java new file mode 100644 index 000000000..b6779529e --- /dev/null +++ b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.side.oracle; + + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.util.List; +import java.util.Map; + +/** + * side operator with cache for all(period reload) + */ +public class OracleAllReqRow extends RdbAllReqRow { + + private static final Logger LOG = LoggerFactory.getLogger(OracleAllReqRow.class); + + private static final String ORACLE_DRIVER = "oracle.jdbc.driver.OracleDriver"; + + public OracleAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public Connection getConn(String dbURL, String userName, String password) { + try { + Class.forName(ORACLE_DRIVER); + Map addParams = Maps.newHashMap(); + String targetDbUrl = DtStringUtil.addJdbcParam(dbURL, addParams, true); + return DriverManager.getConnection(targetDbUrl, userName, password); + } catch (Exception e) { + LOG.error("", e); + throw new RuntimeException("", e); + } + } + +} diff --git a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java new file mode 100644 index 000000000..4063bf9bb --- /dev/null +++ b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java @@ -0,0 +1,33 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.side.oracle; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + +public class OracleAllSideInfo extends RdbAllSideInfo { + + public OracleAllSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/oracle/oracle-side/oracle-async-side/pom.xml b/oracle/oracle-side/oracle-async-side/pom.xml new file mode 100644 index 000000000..3a4b36559 --- /dev/null +++ b/oracle/oracle-side/oracle-async-side/pom.xml @@ -0,0 +1,92 @@ + + + + sql.side.oracle + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.side.async.oracle + 1.0-SNAPSHOT + oracle-async-side + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.oracle.core + ${sql.side.oracle.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java new file mode 100644 index 000000000..c68850d21 --- /dev/null +++ b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.oracle; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncReqRow; +import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import io.vertx.core.Vertx; +import io.vertx.core.VertxOptions; +import io.vertx.core.json.JsonObject; +import io.vertx.ext.jdbc.JDBCClient; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + + +public class OracleAsyncReqRow extends RdbAsyncReqRow { + + private static final Logger LOG = LoggerFactory.getLogger(OracleAsyncReqRow.class); + + private static final String ORACLE_DRIVER = "oracle.jdbc.driver.OracleDriver"; + + public OracleAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + JsonObject sqlserverClientConfig = new JsonObject(); + RdbSideTableInfo rdbSideTableInfo = (RdbSideTableInfo) sideInfo.getSideTableInfo(); + sqlserverClientConfig.put("url", rdbSideTableInfo.getUrl()) + .put("driver_class", ORACLE_DRIVER) + .put("max_pool_size", DEFAULT_MAX_DB_CONN_POOL_SIZE) + .put("user", rdbSideTableInfo.getUserName()) + .put("password", rdbSideTableInfo.getPassword()); + + VertxOptions vo = new VertxOptions(); + vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); + vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + Vertx vertx = Vertx.vertx(vo); + setRdbSQLClient(JDBCClient.createNonShared(vertx, sqlserverClientConfig)); + } +} diff --git a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java new file mode 100644 index 000000000..7ff63f3d9 --- /dev/null +++ b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.oracle; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + + +public class OracleAsyncSideInfo extends RdbAsyncSideInfo { + + public OracleAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/oracle/oracle-side/oracle-side-core/pom.xml b/oracle/oracle-side/oracle-side-core/pom.xml new file mode 100644 index 000000000..cc954d9fd --- /dev/null +++ b/oracle/oracle-side/oracle-side-core/pom.xml @@ -0,0 +1,18 @@ + + + + sql.side.oracle + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.oracle.core + 1.0-SNAPSHOT + jar + oracle-side-core + + \ No newline at end of file diff --git a/oracle/oracle-side/oracle-side-core/src/main/java/com/dtstack/flink/sql/side/oracle/table/OracleSideParser.java b/oracle/oracle-side/oracle-side-core/src/main/java/com/dtstack/flink/sql/side/oracle/table/OracleSideParser.java new file mode 100644 index 000000000..f9124bd10 --- /dev/null +++ b/oracle/oracle-side/oracle-side-core/src/main/java/com/dtstack/flink/sql/side/oracle/table/OracleSideParser.java @@ -0,0 +1,35 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.side.oracle.table; + +import com.dtstack.flink.sql.side.rdb.table.RdbSideParser; +import com.dtstack.flink.sql.table.TableInfo; + +import java.util.Map; + + +public class OracleSideParser extends RdbSideParser { + private static final String CURR_TYPE = "oracle"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + TableInfo oracleTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + oracleTableInfo.setType(CURR_TYPE); + return oracleTableInfo; + } +} diff --git a/oracle/oracle-side/pom.xml b/oracle/oracle-side/pom.xml new file mode 100644 index 000000000..e0ffe547b --- /dev/null +++ b/oracle/oracle-side/pom.xml @@ -0,0 +1,33 @@ + + + + sql.oracle + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.oracle + 1.0-SNAPSHOT + + oracle-side-core + oracle-all-side + oracle-async-side + + pom + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.rdb + ${rdb.side.version} + + + \ No newline at end of file diff --git a/oracle/oracle-sink/pom.xml b/oracle/oracle-sink/pom.xml new file mode 100644 index 000000000..c5807f806 --- /dev/null +++ b/oracle/oracle-sink/pom.xml @@ -0,0 +1,90 @@ + + + + sql.oracle + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.oracle + 1.0-SNAPSHOT + oracle-sink + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.sink.rdb + ${sql.sink.rdb.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java new file mode 100644 index 000000000..2ca020c54 --- /dev/null +++ b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.sink.oracle; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.rdb.RdbSink; +import com.dtstack.flink.sql.sink.rdb.format.ExtendOutputFormat; +import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; +import org.apache.commons.lang3.StringUtils; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Reason: + * Date: 2018/11/27 + * Company: www.dtstack.com + * + * @author maqi + */ +public class OracleSink extends RdbSink implements IStreamSinkGener { + private static final String ORACLE_DRIVER = "oracle.jdbc.driver.OracleDriver"; + + @Override + public String getDriverName() { + return ORACLE_DRIVER; + } + + @Override + public RetractJDBCOutputFormat getOutputFormat() { + return new ExtendOutputFormat(); + } + + @Override + public void buildSql(String tableName, List fields) { + buildInsertSql(tableName, fields); + } + + private void buildInsertSql(String tableName, List fields) { + String sqlTmp = "insert into " + tableName + " (${fields}) values (${placeholder})"; + String fieldsStr = StringUtils.join(fields, ","); + String placeholder = ""; + + for (String fieldName : fields) { + placeholder += ",?"; + } + placeholder = placeholder.replaceFirst(",", ""); + sqlTmp = sqlTmp.replace("${fields}", fieldsStr).replace("${placeholder}", placeholder); + this.sql = sqlTmp; + } + + @Override + public String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField) { + return "MERGE INTO " + tableName + " T1 USING " + + "(" + makeValues(fieldNames) + ") T2 ON (" + + updateKeySql(realIndexes) + ") WHEN MATCHED THEN UPDATE SET " + + getUpdateSql(fieldNames, fullField, "T1", "T2", keyColList(realIndexes)) + " WHEN NOT MATCHED THEN " + + "INSERT (" + quoteColumns(fieldNames) + ") VALUES (" + + quoteColumns(fieldNames, "T2") + ")"; + } + + + public String quoteColumns(List column) { + return quoteColumns(column, null); + } + + public String quoteColumns(List column, String table) { + String prefix = StringUtils.isBlank(table) ? "" : quoteTable(table) + "."; + List list = new ArrayList<>(); + for (String col : column) { + list.add(prefix + quoteColumn(col)); + } + return StringUtils.join(list, ","); + } + + protected List keyColList(Map> updateKey) { + List keyCols = new ArrayList<>(); + for (Map.Entry> entry : updateKey.entrySet()) { + List list = entry.getValue(); + for (String col : list) { + if (!containsIgnoreCase(keyCols,col)) { + keyCols.add(col); + } + } + } + return keyCols; + } + + public String getUpdateSql(List column, List fullColumn, String leftTable, String rightTable, List keyCols) { + String prefixLeft = StringUtils.isBlank(leftTable) ? "" : quoteTable(leftTable) + "."; + String prefixRight = StringUtils.isBlank(rightTable) ? "" : quoteTable(rightTable) + "."; + List list = new ArrayList<>(); + for (String col : fullColumn) { + if (keyCols == null || keyCols.size() == 0 || containsIgnoreCase(keyCols,col)) { + continue; + } + if (fullColumn == null ||containsIgnoreCase(column,col)) { + list.add(prefixLeft + col + "=" + prefixRight + col); + } else { + list.add(prefixLeft + col + "=null"); + } + } + return StringUtils.join(list, ","); + } + + public String quoteTable(String table) { + String[] parts = table.split("\\."); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < parts.length; ++i) { + if (i != 0) { + sb.append("."); + } + sb.append(getStartQuote() + parts[i] + getEndQuote()); + } + return sb.toString(); + } + + + public String updateKeySql(Map> updateKey) { + List exprList = new ArrayList<>(); + for (Map.Entry> entry : updateKey.entrySet()) { + List colList = new ArrayList<>(); + for (String col : entry.getValue()) { + colList.add("T1." + quoteColumn(col) + "=T2." + quoteColumn(col)); + } + exprList.add(StringUtils.join(colList, " AND ")); + } + return StringUtils.join(exprList, " OR "); + } + + + public String makeValues(List column) { + StringBuilder sb = new StringBuilder("SELECT "); + for (int i = 0; i < column.size(); ++i) { + if (i != 0) { + sb.append(","); + } + sb.append("? " + quoteColumn(column.get(i))); + } + sb.append(" FROM DUAL"); + return sb.toString(); + } + + public boolean containsIgnoreCase(List l, String s) { + Iterator it = l.iterator(); + while (it.hasNext()) { + if (it.next().equalsIgnoreCase(s)) + return true; + } + return false; + } + + public String quoteColumn(String column) { + return getStartQuote() + column + getEndQuote(); + } + + public String getStartQuote() { + return "\""; + } + + public String getEndQuote() { + return "\""; + } + + +} diff --git a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java new file mode 100644 index 000000000..6db2c9c06 --- /dev/null +++ b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java @@ -0,0 +1,41 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.sink.oracle.table; + +import com.dtstack.flink.sql.sink.rdb.table.RdbSinkParser; +import com.dtstack.flink.sql.table.TableInfo; + +import java.util.Map; + +/** + * Reason: + * Date: 2018/11/27 + * Company: www.dtstack.com + * + * @author maqi + */ +public class OracleSinkParser extends RdbSinkParser { + private static final String CURR_TYPE = "oracle"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + TableInfo sqlserverTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + sqlserverTableInfo.setType(CURR_TYPE); + return sqlserverTableInfo; + } +} diff --git a/oracle/pom.xml b/oracle/pom.xml new file mode 100644 index 000000000..60565a149 --- /dev/null +++ b/oracle/pom.xml @@ -0,0 +1,39 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.oracle + 1.0-SNAPSHOT + + oracle-side + oracle-sink + + pom + + + 12.2.0.1 + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.core + ${sql.core.version} + provided + + + + com.github.noraui + ojdbc8 + ${ojdbc.version} + + + \ No newline at end of file diff --git a/pom.xml b/pom.xml index bde73d6df..c3ad4d24b 100644 --- a/pom.xml +++ b/pom.xml @@ -5,6 +5,9 @@ com.dtstack.flink flink.sql 1.0-SNAPSHOT + pom + flink.sql + http://maven.apache.org core kafka09 @@ -18,16 +21,18 @@ launcher rdb sqlserver + oracle + cassandra + kafka08 + serversocket + console - pom - flink.sql - http://maven.apache.org - - UTF-8 - 1.5.4 - + + UTF-8 + 1.6.2 + diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java index 54ea47a18..abae0cbb2 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java @@ -23,8 +23,11 @@ import com.dtstack.flink.sql.side.JoinInfo; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import com.dtstack.flink.sql.side.rdb.util.MathUtil; +import com.dtstack.flink.sql.side.rdb.util.SwitchUtil; import org.apache.calcite.sql.JoinType; import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; @@ -34,6 +37,7 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import java.math.BigDecimal; import java.sql.*; import java.util.Calendar; import java.util.List; @@ -58,12 +62,13 @@ public abstract class RdbAllReqRow extends AllReqRow { private AtomicReference>>> cacheRef = new AtomicReference<>(); + public RdbAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { super(new RdbAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); } @Override - protected Row fillData(Row input, Object sideInput) { + public Row fillData(Row input, Object sideInput) { Map cacheInfo = (Map) sideInput; Row row = new Row(sideInfo.getOutFieldInfoList().size()); for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { @@ -74,6 +79,7 @@ protected Row fillData(Row input, Object sideInput) { if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { obj = ((Timestamp) obj).getTime(); } + row.setField(entry.getKey(), obj); } @@ -194,10 +200,14 @@ private void loadData(Map>> tmpCache) throws SQ statement.setFetchSize(getFetchSize()); ResultSet resultSet = statement.executeQuery(sql); String[] sideFieldNames = sideInfo.getSideSelectFields().split(","); + String[] fields = sideInfo.getSideTableInfo().getFieldTypes(); while (resultSet.next()) { Map oneRow = Maps.newHashMap(); for (String fieldName : sideFieldNames) { - oneRow.put(fieldName.trim(), resultSet.getObject(fieldName.trim())); + Object object = resultSet.getObject(fieldName.trim()); + int fieldIndex = sideInfo.getSideTableInfo().getFieldList().indexOf(fieldName.trim()); + object = SwitchUtil.getTarget(object, fields[fieldIndex]); + oneRow.put(fieldName.trim(), object); } String cacheKey = buildKey(oneRow, sideInfo.getEqualFieldList()); @@ -213,7 +223,7 @@ private void loadData(Map>> tmpCache) throws SQ } } - public int getFetchSize(){ + public int getFetchSize() { return 1000; } diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java index a7e5339e9..60b6f8e1c 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java @@ -22,12 +22,12 @@ import com.dtstack.flink.sql.enums.ECacheContentType; import com.dtstack.flink.sql.side.*; import com.dtstack.flink.sql.side.cache.CacheObj; +import com.dtstack.flink.sql.side.rdb.util.SwitchUtil; import io.vertx.core.json.JsonArray; import io.vertx.ext.sql.SQLClient; import io.vertx.ext.sql.SQLConnection; import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; @@ -35,7 +35,6 @@ import org.slf4j.LoggerFactory; import java.sql.Timestamp; -import java.util.Collections; import java.util.List; import java.util.Map; @@ -72,8 +71,8 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except Object equalObj = input.getField(conValIndex); if (equalObj == null) { resultFuture.complete(null); + return; } - inputParams.add(equalObj); } @@ -86,12 +85,12 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except dealMissKey(input, resultFuture); return; } else if (ECacheContentType.MultiLine == val.getType()) { - + List rowList = Lists.newArrayList(); for (Object jsonArray : (List) val.getContent()) { Row row = fillData(input, jsonArray); - resultFuture.complete(Collections.singleton(row)); + rowList.add(row); } - + resultFuture.complete(rowList); } else { throw new RuntimeException("not support cache obj type " + val.getType()); } @@ -119,17 +118,20 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except int resultSize = rs.result().getResults().size(); if (resultSize > 0) { + List rowList = Lists.newArrayList(); + for (JsonArray line : rs.result().getResults()) { Row row = fillData(input, line); if (openCache()) { cacheContent.add(line); } - resultFuture.complete(Collections.singleton(row)); + rowList.add(row); } - if (openCache()) { putCache(key, CacheObj.buildCacheObj(ECacheContentType.MultiLine, cacheContent)); } + + resultFuture.complete(rowList); } else { dealMissKey(input, resultFuture); if (openCache()) { @@ -151,10 +153,10 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except public Row fillData(Row input, Object line) { JsonArray jsonArray = (JsonArray) line; Row row = new Row(sideInfo.getOutFieldInfoList().size()); + String[] fields = sideInfo.getSideTableInfo().getFieldTypes(); for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { Object obj = input.getField(entry.getValue()); boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); - if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { obj = ((Timestamp) obj).getTime(); } @@ -166,7 +168,8 @@ public Row fillData(Row input, Object line) { if (jsonArray == null) { row.setField(entry.getKey(), null); } else { - row.setField(entry.getKey(), jsonArray.getValue(entry.getValue())); + Object object = SwitchUtil.getTarget(jsonArray.getValue(entry.getValue()), fields[entry.getValue()]); + row.setField(entry.getKey(), object); } } diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java index b7ff94ea9..f652bf342 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java @@ -42,6 +42,8 @@ public class RdbAsyncSideInfo extends SideInfo { + private static final long serialVersionUID = 1942629132469918611L; + public RdbAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); } diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java index 82d013a98..e007cecb0 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java @@ -47,7 +47,7 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map + * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.side.rdb.util; + +import java.text.ParseException; + +/** + * Reason: + * Date: 2018/12/3 + * Company: www.dtstack.com + * + * @author maqi + */ +public class SwitchUtil { + public static Object getTarget(Object obj, String targetType) { + targetType = targetType.toLowerCase(); + switch (targetType) { + case "int": + case "integer": + return MathUtil.getIntegerVal(obj); + case "bigint": + return MathUtil.getLongVal(obj); + case "boolean": + return MathUtil.getBoolean(obj); + case "tinyint": + return MathUtil.getByte(obj); + case "smallint": + return MathUtil.getShort(obj); + case "varchar": + return MathUtil.getString(obj); + case "real": + case "float": + return MathUtil.getFloatVal(obj); + case "double": + return MathUtil.getDoubleVal(obj); + case "decimal": + return MathUtil.getBigDecimal(obj); + case "date": + return MathUtil.getDate(obj); + case "timestamp": + return MathUtil.getTimestamp(obj); + } + return obj; + } +} diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/RdbSink.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/RdbSink.java index d587639aa..5ee2c44c4 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/RdbSink.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/RdbSink.java @@ -35,6 +35,7 @@ import java.io.Serializable; import java.math.BigDecimal; +import java.sql.Date; import java.sql.Timestamp; import java.sql.Types; import java.util.Arrays; @@ -99,8 +100,6 @@ public RichSinkFunction createJdbcSinkFunc() { return outputFormatSinkFunc; } - public abstract RetractJDBCOutputFormat getOutputFormat(); - @Override public RdbSink genStreamSink(TargetTableInfo targetTableInfo) { @@ -151,7 +150,9 @@ protected void buildSqlTypes(List fieldTypeArray) { String fieldType = fieldTypeArray.get(i).getName(); if (fieldType.equals(Integer.class.getName())) { tmpFieldsType[i] = Types.INTEGER; - } else if (fieldType.equals(Long.class.getName())) { + }else if (fieldType.equals(Boolean.class.getName())) { + tmpFieldsType[i] = Types.BOOLEAN; + }else if (fieldType.equals(Long.class.getName())) { tmpFieldsType[i] = Types.BIGINT; } else if (fieldType.equals(Byte.class.getName())) { tmpFieldsType[i] = Types.TINYINT; @@ -169,6 +170,8 @@ protected void buildSqlTypes(List fieldTypeArray) { tmpFieldsType[i] = Types.TIMESTAMP; } else if (fieldType.equals(BigDecimal.class.getName())) { tmpFieldsType[i] = Types.DECIMAL; + } else if (fieldType.equals(Date.class.getName())) { + tmpFieldsType[i] = Types.DATE; } else { throw new RuntimeException("no support field type for sql. the input type:" + fieldType); } @@ -252,4 +255,7 @@ public void setDbType(String dbType) { public abstract String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField); public abstract String getDriverName(); + + public abstract RetractJDBCOutputFormat getOutputFormat(); + } diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java new file mode 100644 index 000000000..7c14807d2 --- /dev/null +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.rdb.format; + +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; + +import java.sql.ResultSet; +import java.sql.SQLException; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +/** + * Reason: + * Date: 2018/11/30 + * Company: www.dtstack.com + * + * @author maqi + */ +public class ExtendOutputFormat extends RetractJDBCOutputFormat { + + + @Override + public boolean isReplaceInsertQuery() throws SQLException { + fillRealIndexes(); + fillFullColumns(); + + if (!getRealIndexes().isEmpty()) { + for (List value : getRealIndexes().values()) { + for (String fieldName : getDbSink().getFieldNames()) { + if (containsIgnoreCase(value, fieldName)) { + return true; + } + } + } + } + return false; + } + + /** + * get db all index + * + * @throws SQLException + */ + public void fillRealIndexes() throws SQLException { + Map> map = Maps.newHashMap(); + ResultSet rs = getDbConn().getMetaData().getIndexInfo(null, null, getTableName(), true, false); + + while (rs.next()) { + String indexName = rs.getString("INDEX_NAME"); + if (!map.containsKey(indexName)) { + map.put(indexName, new ArrayList<>()); + } + String column_name = rs.getString("COLUMN_NAME"); + if (StringUtils.isNotBlank(column_name)) { + column_name = column_name; + } + map.get(indexName).add(column_name); + } + + for (Map.Entry> entry : map.entrySet()) { + String k = entry.getKey(); + List v = entry.getValue(); + if (v != null && v.size() != 0 && v.get(0) != null) { + getRealIndexes().put(k, v); + } + } + } + + /** + * get db all column name + * + * @throws SQLException + */ + public void fillFullColumns() throws SQLException { + ResultSet rs = getDbConn().getMetaData().getColumns(null, null, getTableName(), null); + while (rs.next()) { + String columnName = rs.getString("COLUMN_NAME"); + if (StringUtils.isNotBlank(columnName)) { + getFullField().add(columnName); + } + } + } + + public boolean containsIgnoreCase(List l, String s) { + Iterator it = l.iterator(); + while (it.hasNext()) { + if (it.next().equalsIgnoreCase(s)) + return true; + } + return false; + } + +} diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java index 8f5329796..382b687e1 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java @@ -28,7 +28,6 @@ import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.sql.*; import java.util.*; import java.io.IOException; diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java index a14825a67..0c248997b 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java @@ -46,6 +46,7 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map>> cacheRef = new AtomicReference<>(); + private RedisSideReqRow redisSideReqRow; + public RedisAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { super(new RedisAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + this.redisSideReqRow = new RedisSideReqRow(super.sideInfo); } @Override - protected Row fillData(Row input, Object sideInput) { - Map sideInputMap = (Map) sideInput; - Row row = new Row(sideInfo.getOutFieldInfoList().size()); - for(Map.Entry entry : sideInfo.getInFieldIndex().entrySet()){ - Object obj = input.getField(entry.getValue()); - boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); - - if(obj instanceof Timestamp && isTimeIndicatorTypeInfo){ - obj = ((Timestamp)obj).getTime(); - } - row.setField(entry.getKey(), obj); - } - - for(Map.Entry entry : sideInfo.getSideFieldIndex().entrySet()){ - if(sideInputMap == null){ - row.setField(entry.getKey(), null); - }else{ - String key = sideInfo.getSideFieldNameIndex().get(entry.getKey()); - row.setField(entry.getKey(), sideInputMap.get(key)); - } - } - - return row; + public Row fillData(Row input, Object sideInput) { + return redisSideReqRow.fillData(input, sideInput); } @Override diff --git a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java index 82532c527..251107287 100644 --- a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java +++ b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java @@ -21,6 +21,7 @@ import com.dtstack.flink.sql.enums.ECacheContentType; import com.dtstack.flink.sql.side.*; import com.dtstack.flink.sql.side.cache.CacheObj; +import com.dtstack.flink.sql.side.redis.table.RedisSideReqRow; import com.dtstack.flink.sql.side.redis.table.RedisSideTableInfo; import io.lettuce.core.KeyValue; import io.lettuce.core.RedisClient; @@ -60,9 +61,11 @@ public class RedisAsyncReqRow extends AsyncReqRow { private RedisSideTableInfo redisSideTableInfo; + private RedisSideReqRow redisSideReqRow; public RedisAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { super(new RedisAsyncSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + redisSideReqRow = new RedisSideReqRow(super.sideInfo); } @Override @@ -77,6 +80,8 @@ private void buildRedisClient(RedisSideTableInfo tableInfo){ String password = redisSideTableInfo.getPassword(); if (password != null){ password = password + "@"; + } else { + password = ""; } String database = redisSideTableInfo.getDatabase(); if (database == null){ @@ -108,30 +113,8 @@ private void buildRedisClient(RedisSideTableInfo tableInfo){ } @Override - protected Row fillData(Row input, Object sideInput) { - Map keyValue = (Map) sideInput; - Row row = new Row(sideInfo.getOutFieldInfoList().size()); - for(Map.Entry entry : sideInfo.getInFieldIndex().entrySet()){ - Object obj = input.getField(entry.getValue()); - boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); - - if(obj instanceof Timestamp && isTimeIndicatorTypeInfo){ - obj = ((Timestamp)obj).getTime(); - } - - row.setField(entry.getKey(), obj); - } - - for(Map.Entry entry : sideInfo.getSideFieldIndex().entrySet()){ - if(keyValue == null){ - row.setField(entry.getKey(), null); - }else{ - String key = sideInfo.getSideFieldNameIndex().get(entry.getKey()); - row.setField(entry.getKey(), keyValue.get(key)); - } - } - - return row; + public Row fillData(Row input, Object sideInput) { + return redisSideReqRow.fillData(input, sideInput); } @Override @@ -158,8 +141,12 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except dealMissKey(input, resultFuture); return; }else if(ECacheContentType.MultiLine == val.getType()){ - Row row = fillData(input, val.getContent()); - resultFuture.complete(Collections.singleton(row)); + List rowList = Lists.newArrayList(); + for (Object jsonArray : (List) val.getContent()) { + Row row = fillData(input, val.getContent()); + rowList.add(row); + } + resultFuture.complete(rowList); }else{ throw new RuntimeException("not support cache obj type " + val.getType()); } diff --git a/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java new file mode 100644 index 000000000..40dae6c68 --- /dev/null +++ b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.redis.table; + +import com.dtstack.flink.sql.side.ISideReqRow; +import com.dtstack.flink.sql.side.SideInfo; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; + +import java.io.Serializable; +import java.sql.Timestamp; +import java.util.Map; + +/** + * redis fill row data + * Date: 2018/12/4 + * Company: www.dtstack.com + * @author xuchao + */ + +public class RedisSideReqRow implements ISideReqRow, Serializable { + + private static final long serialVersionUID = 3751171828444748982L; + + private SideInfo sideInfo; + + public RedisSideReqRow(SideInfo sideInfo){ + this.sideInfo = sideInfo; + } + + @Override + public Row fillData(Row input, Object sideInput) { + Map sideInputMap = (Map) sideInput; + Row row = new Row(sideInfo.getOutFieldInfoList().size()); + for(Map.Entry entry : sideInfo.getInFieldIndex().entrySet()){ + Object obj = input.getField(entry.getValue()); + boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); + + if(obj instanceof Timestamp && isTimeIndicatorTypeInfo){ + obj = ((Timestamp)obj).getTime(); + } + row.setField(entry.getKey(), obj); + } + + for(Map.Entry entry : sideInfo.getSideFieldIndex().entrySet()){ + if(sideInputMap == null){ + row.setField(entry.getKey(), null); + }else{ + String key = sideInfo.getSideFieldNameIndex().get(entry.getKey()); + row.setField(entry.getKey(), sideInputMap.get(key)); + } + } + + return row; + } +} diff --git a/redis5/redis5-sink/pom.xml b/redis5/redis5-sink/pom.xml index 994c3972a..2919d8685 100644 --- a/redis5/redis5-sink/pom.xml +++ b/redis5/redis5-sink/pom.xml @@ -26,7 +26,7 @@ redis.clients jedis - 2.8.0 + 2.9.0 diff --git a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java index 910882c9f..962054f24 100644 --- a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java +++ b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java @@ -108,6 +108,10 @@ private void establishConnection() { if (timeout == 0){ timeout = 10000; } + if (database == null) + { + database = "0"; + } switch (redisType){ //单机 @@ -122,7 +126,7 @@ private void establishConnection() { break; //集群 case 3: - jedis = new JedisCluster(addresses, timeout, timeout,1, poolConfig); + jedis = new JedisCluster(addresses, timeout, timeout,10, password, poolConfig); } } @@ -134,7 +138,7 @@ public void writeRecord(Tuple2 record) throws IOException { return; } Row row = tupleTrans.getField(1); - if (record.getArity() != fieldNames.length) { + if (row.getArity() != fieldNames.length) { return; } @@ -162,7 +166,7 @@ public void writeRecord(Tuple2 record) throws IOException { for (int i = 0; i < fieldNames.length; i++) { StringBuilder key = new StringBuilder(); key.append(tableName).append(":").append(perKey).append(":").append(fieldNames[i]); - jedis.set(key.toString(), (String) row.getField(i)); + jedis.set(key.toString(), row.getField(i).toString()); } outRecords.inc(); } diff --git a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java index 6960dc249..c114b946e 100644 --- a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java +++ b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java @@ -33,7 +33,7 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.serversocket + 1.0-SNAPSHOT + + serversocket-source + + pom + + + 3.8.1 + 1.0-SNAPSHOT + + + + + junit + junit + ${junit.version} + test + + + + com.dtstack.flink + sql.core + ${sql.core.version} + provided + + + \ No newline at end of file diff --git a/serversocket/serversocket-source/pom.xml b/serversocket/serversocket-source/pom.xml new file mode 100644 index 000000000..4df2c3849 --- /dev/null +++ b/serversocket/serversocket-source/pom.xml @@ -0,0 +1,83 @@ + + + + sql.serversocket + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.source.serversocket + 1.0-SNAPSHOT + jar + + serversocket-source + http://maven.apache.org + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java new file mode 100644 index 000000000..ce9a44895 --- /dev/null +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java @@ -0,0 +1,183 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.serversocket; + +import com.dtstack.flink.sql.source.serversocket.table.ServersocketSourceTableInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.streaming.api.functions.source.SourceFunction; +import org.apache.flink.types.Row; +import org.apache.flink.util.IOUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.Iterator; + + +/** + * Reason: + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author maqi + */ +public class CustomerSocketTextStreamFunction implements SourceFunction { + private static final Logger LOG = LoggerFactory.getLogger(CustomerSocketTextStreamFunction.class); + + /** + * Default delay between successive connection attempts. + */ + private static final int DEFAULT_CONNECTION_RETRY_SLEEP = 2000; + + /** + * Default connection timeout when connecting to the server socket (infinite). + */ + private static final int CONNECTION_TIMEOUT_TIME = 0; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** + * Type information describing the result type. + */ + private final TypeInformation typeInfo; + + /** + * Field names to parse. Indices match fieldTypes indices. + */ + private final String[] fieldNames; + + /** + * Types to parse fields as. Indices match fieldNames indices. + */ + private final TypeInformation[] fieldTypes; + + private volatile boolean isRunning = true; + + private transient Socket currentSocket; + + ServersocketSourceTableInfo tableInfo; + + public CustomerSocketTextStreamFunction(ServersocketSourceTableInfo tableInfo, TypeInformation typeInfo) { + this.typeInfo = typeInfo; + + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + + this.tableInfo = tableInfo; + } + + @Override + public void run(SourceContext ctx) throws Exception { + final StringBuilder buffer = new StringBuilder(); + long attempt = 0; + + while (isRunning) { + try { + Socket socket = new Socket(); + currentSocket = socket; + socket.connect(new InetSocketAddress(tableInfo.getHostname(), tableInfo.getPort()), CONNECTION_TIMEOUT_TIME); + + BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream())); + char[] cbuf = new char[8192]; + int bytesRead; + while (isRunning && (bytesRead = reader.read(cbuf)) != -1) { + buffer.append(cbuf, 0, bytesRead); + int delimPos; + String delimiter = tableInfo.getDelimiter(); + while (buffer.length() >= delimiter.length() && (delimPos = buffer.indexOf(delimiter)) != -1) { + String record = buffer.substring(0, delimPos); + // truncate trailing carriage return + if (delimiter.equals("\n") && record.endsWith("\r")) { + record = record.substring(0, record.length() - 1); + } + ctx.collect(convertToRow(record)); + buffer.delete(0, delimPos + delimiter.length()); + } + } + } catch (Exception e) { + LOG.info("Connection server failed, Please check configuration !!!!!!!!!!!!!!!!"); + } + + + // if we dropped out of this loop due to an EOF, sleep and retry + if (isRunning) { + attempt++; + if (tableInfo.getMaxNumRetries() == -1 || attempt < tableInfo.getMaxNumRetries()) { + Thread.sleep(DEFAULT_CONNECTION_RETRY_SLEEP); + } else { + // this should probably be here, but some examples expect simple exists of the stream source + // throw new EOFException("Reached end of stream and reconnects are not enabled."); + break; + } + } + } + + // collect trailing data + if (buffer.length() > 0) { + ctx.collect(convertToRow(buffer.toString())); + } + } + + public Row convertToRow(String record) throws IOException { + JsonNode root = objectMapper.readTree(record); + Row row = new Row(fieldNames.length); + for (int i = 0; i < fieldNames.length; i++) { + JsonNode node = getIgnoreCase(root, fieldNames[i]); + if (node == null) { + row.setField(i, null); + } else { + // Read the value as specified type + Object value = objectMapper.treeToValue(node, fieldTypes[i].getTypeClass()); + row.setField(i, value); + } + } + return row; + } + + + @Override + public void cancel() { + isRunning = false; + + // we need to close the socket as well, because the Thread.interrupt() function will + // not wake the thread in the socketStream.read() method when blocked. + Socket theSocket = this.currentSocket; + if (theSocket != null) { + IOUtils.closeSocket(theSocket); + } + } + + public JsonNode getIgnoreCase(JsonNode jsonNode, String key) { + Iterator iter = jsonNode.fieldNames(); + while (iter.hasNext()) { + String key1 = iter.next(); + if (key1.equalsIgnoreCase(key)) { + return jsonNode.get(key1); + } + } + return null; + } +} diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/ServersocketSource.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/ServersocketSource.java new file mode 100644 index 000000000..3a67d2c98 --- /dev/null +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/ServersocketSource.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.serversocket; + +import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.serversocket.table.ServersocketSourceTableInfo; +import com.dtstack.flink.sql.table.SourceTableInfo; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.api.functions.source.SocketTextStreamFunction; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.java.StreamTableEnvironment; +import org.apache.flink.types.Row; + +/** + * Reason: + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author maqi + */ +public class ServersocketSource implements IStreamSourceGener

{ + @Override + public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { + ServersocketSourceTableInfo serversocketSourceTableInfo = (ServersocketSourceTableInfo) sourceTableInfo; + + String tableName = serversocketSourceTableInfo.getName(); + + TypeInformation[] types = new TypeInformation[serversocketSourceTableInfo.getFields().length]; + for (int i = 0; i < serversocketSourceTableInfo.getFieldClasses().length; i++) { + types[i] = TypeInformation.of(serversocketSourceTableInfo.getFieldClasses()[i]); + } + + TypeInformation typeInformation = new RowTypeInfo(types, serversocketSourceTableInfo.getFields()); + + String fields = StringUtils.join(serversocketSourceTableInfo.getFields(), ","); + + CustomerSocketTextStreamFunction customerSocketTextStreamFunction = new CustomerSocketTextStreamFunction(serversocketSourceTableInfo, typeInformation); + + DataStreamSource serversocketSource = env.addSource(customerSocketTextStreamFunction, tableName, typeInformation); + + return tableEnv.fromDataStream(serversocketSource, fields); + } +} diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceParser.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceParser.java new file mode 100644 index 000000000..b52e38e59 --- /dev/null +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceParser.java @@ -0,0 +1,49 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.serversocket.table; + +import com.dtstack.flink.sql.table.AbsSourceParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +/** + * Reason: + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author maqi + */ +public class ServersocketSourceParser extends AbsSourceParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + ServersocketSourceTableInfo serversocketSourceTableInfo = new ServersocketSourceTableInfo(); + serversocketSourceTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, serversocketSourceTableInfo); + + serversocketSourceTableInfo.setHostname(MathUtil.getString(props.get(ServersocketSourceTableInfo.HOSTNAME_KEY.toLowerCase()))); + serversocketSourceTableInfo.setPort(MathUtil.getIntegerVal(props.get(ServersocketSourceTableInfo.PORT_KEY.toLowerCase()))); + serversocketSourceTableInfo.setDelimiter(MathUtil.getString(props.get(ServersocketSourceTableInfo.DELIMITER_KEY.toLowerCase()))); + serversocketSourceTableInfo.setMaxNumRetries(MathUtil.getLongVal(props.get(ServersocketSourceTableInfo.MAXNUMRETRIES_KEY.toLowerCase()))); + + serversocketSourceTableInfo.check(); + + return serversocketSourceTableInfo; + } +} diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java new file mode 100644 index 000000000..2ff9a6739 --- /dev/null +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java @@ -0,0 +1,101 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + *

+ * http://www.apache.org/licenses/LICENSE-2.0 + *

+ * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.dtstack.flink.sql.source.serversocket.table; + +import com.dtstack.flink.sql.table.SourceTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +/** + * Reason: + * Date: 2018/12/18 + * Company: www.dtstack.com + * + * @author maqi + */ +public class ServersocketSourceTableInfo extends SourceTableInfo { + //version + private static final String CURR_TYPE = "serversocket"; + + public static final String HOSTNAME_KEY = "host"; + + public static final String PORT_KEY = "port"; + + public static final String DELIMITER_KEY = "delimiter"; + + public static final String MAXNUMRETRIES_KEY = "maxNumRetries"; + + + public ServersocketSourceTableInfo() { + super.setType(CURR_TYPE); + } + + private String hostname; + + private int port; + + private String delimiter; + + private long maxNumRetries; + + public String getHostname() { + return hostname; + } + + public void setHostname(String hostname) { + this.hostname = hostname; + } + + public int getPort() { + return port; + } + + public void setPort(int port) { + this.port = port; + } + + public String getDelimiter() { + return delimiter; + } + + public void setDelimiter(String delimiter) { + this.delimiter = delimiter; + } + + public long getMaxNumRetries() { + return maxNumRetries; + } + + public void setMaxNumRetries(long maxNumRetries) { + this.maxNumRetries = maxNumRetries; + } + + + @Override + public boolean check() { + Preconditions.checkNotNull(hostname,"host name not null"); + Preconditions.checkNotNull(port,"port not null"); + Preconditions.checkNotNull(delimiter,"delimiter name not null"); + Preconditions.checkNotNull(maxNumRetries,"maxNumRetries name not null"); + + Preconditions.checkArgument(port > 0 && port < 65536, "port is out of range"); + Preconditions.checkArgument(maxNumRetries >= -1, "maxNumRetries must be zero or larger (num retries), or -1 (infinite retries)"); + return false; + } + + +} diff --git a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverOutputFormat.java b/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverOutputFormat.java deleted file mode 100644 index 21d1f3bd1..000000000 --- a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverOutputFormat.java +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.dtstack.flink.sql.sink.sqlserver; - -import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; -import java.sql.ResultSet; -import java.sql.SQLException; -import java.util.ArrayList; -import java.util.List; -import java.util.Map; - -/** - * Reason: - * Date: 2018/11/30 - * Company: www.dtstack.com - * - * @author maqi - */ -public class SqlserverOutputFormat extends RetractJDBCOutputFormat { - - - @Override - public boolean isReplaceInsertQuery() throws SQLException { - fillRealIndexes(); - fillFullColumns(); - - if (!getRealIndexes().isEmpty()) { - for (List value : getRealIndexes().values()) { - for (String fieldName : getDbSink().getFieldNames()) { - if (value.contains(fieldName)) { - return true; - } - } - } - } - return false; - } - - /** - * get db all index - * - * @throws SQLException - */ - public void fillRealIndexes() throws SQLException { - Map> map = Maps.newHashMap(); - ResultSet rs = getDbConn().getMetaData().getIndexInfo(null, null, getTableName(), true, false); - - while (rs.next()) { - String indexName = rs.getString("INDEX_NAME"); - if (!map.containsKey(indexName)) { - map.put(indexName, new ArrayList<>()); - } - String column_name = rs.getString("COLUMN_NAME"); - if (StringUtils.isNotBlank(column_name)) { - column_name = column_name.toUpperCase(); - } - map.get(indexName).add(column_name); - } - - for (Map.Entry> entry : map.entrySet()) { - String k = entry.getKey(); - List v = entry.getValue(); - if (v != null && v.size() != 0 && v.get(0) != null) { - getRealIndexes().put(k, v); - } - } - } - - /** - * get db all column name - * - * @throws SQLException - */ - public void fillFullColumns() throws SQLException { - String schema = null; - ResultSet rs = getDbConn().getMetaData().getColumns(null, schema, getTableName(), null); - while (rs.next()) { - String columnName = rs.getString("COLUMN_NAME"); - if (StringUtils.isNotBlank(columnName)) { - getFullField().add(columnName.toUpperCase()); - } - } - } - - -} diff --git a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java b/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java index 1e266e3bc..904cb0895 100644 --- a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java +++ b/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java @@ -19,6 +19,7 @@ import com.dtstack.flink.sql.sink.IStreamSinkGener; import com.dtstack.flink.sql.sink.rdb.RdbSink; +import com.dtstack.flink.sql.sink.rdb.format.ExtendOutputFormat; import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; import org.apache.commons.lang3.StringUtils; @@ -41,7 +42,7 @@ public String getDriverName() { @Override public RetractJDBCOutputFormat getOutputFormat() { - return new SqlserverOutputFormat(); + return new ExtendOutputFormat(); } @Override @@ -91,7 +92,7 @@ protected List keyColList(Map> updateKey) { for (Map.Entry> entry : updateKey.entrySet()) { List list = entry.getValue(); for (String col : list) { - if (!keyCols.contains(col)) { + if (!containsIgnoreCase(keyCols,col)) { keyCols.add(col); } } @@ -107,7 +108,7 @@ public String getUpdateSql(List column, List fullColumn, String if (keyCols == null || keyCols.size() == 0) { continue; } - if (fullColumn == null || column.contains(col)) { + if (fullColumn == null || containsIgnoreCase(column,col)) { list.add(prefixLeft + col + "=" + prefixRight + col); } else { list.add(prefixLeft + col + "=null"); @@ -153,6 +154,14 @@ public String makeValues(List column) { return sb.toString(); } + public boolean containsIgnoreCase(List l, String s) { + Iterator it = l.iterator(); + while (it.hasNext()) { + if (it.next().equalsIgnoreCase(s)) + return true; + } + return false; + } public String quoteColumn(String column) { return getStartQuote() + column + getEndQuote(); }