diff --git a/README.md b/README.md index 3c4c527a8..6474dc052 100644 --- a/README.md +++ b/README.md @@ -8,27 +8,32 @@ > > * 扩展了输入和输出的性能指标到promethus ## 新特性: - 1.kafka源表支持not null语法,支持字符串类型的时间转换。 - 2.rdb维表与DB建立连接时,周期进行连接,防止连接断开。rdbsink写入时,对连接进行检查。 - 3.异步维表支持非等值连接,比如:<>,<,>。 + * 1.kafka源表支持not null语法,支持字符串类型的时间转换。 + * 2.rdb维表与DB建立连接时,周期进行连接,防止连接断开。rdbsink写入时,对连接进行检查。 + * 3.异步维表支持非等值连接,比如:<>,<,>。 + * 4.增加kafka数组解析 + * 5.增加kafka1.0以上版本的支持 + * 6.增加postgresql、kudu、clickhouse维表、结果表的支持 + * 7.支持插件的依赖方式,参考pluginLoadMode参数 + * 8.支持cep处理 + * 9.支持udaf + * 10.支持谓词下移 ## BUG修复: - 1.修复不能解析sql中orderby,union语法。 - 2.修复yarnPer模式提交失败的异常。 + * 1.修复不能解析sql中orderby,union语法。 + * 2.修复yarnPer模式提交失败的异常。 + * 3.一些bug的修复 # 已支持 - * 源表:kafka 0.9,1.x版本 - * 维表:mysql,SQlServer,oracle,hbase,mongo,redis,cassandra,serversocket - * 结果表:mysql,SQlServer,oracle,hbase,elasticsearch5.x,mongo,redis,cassandra,console + * 源表:kafka 0.9、0.10、0.11、1.x版本 + * 维表:mysql, SQlServer,oracle, hbase, mongo, redis, cassandra, serversocket, kudu, postgresql, clickhouse + * 结果表:mysql, SQlServer, oracle, hbase, elasticsearch5.x, mongo, redis, cassandra, console, kudu, postgresql, clickhouse # 后续开发计划 - * 增加SQL支持CEP * 维表快照 - * sql优化(谓词下移等) * kafka avro格式 * topN - ## 1 快速起步 ### 1.1 运行模式 @@ -40,7 +45,7 @@ ### 1.2 执行环境 * Java: JDK8及以上 -* Flink集群: 1.4,1.5(单机模式不需要安装Flink集群) +* Flink集群: 1.4,1.5,1.8(单机模式不需要安装Flink集群) * 操作系统:理论上不限 ### 1.3 打包 @@ -150,6 +155,11 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack * 必选:否 * 默认值:false +* **pluginLoadMode** + * 描述:per_job 模式下的插件包加载方式。classpath:从每台机器加载插件包,shipfile:将需要插件从提交的节点上传到hdfs,不需要每台安装插件 + * 必选:否 + * 默认值:classpath + * **yarnSessionConf** * 描述:yarn session 模式下指定的运行的一些参数,[可参考](https://ci.apache.org/projects/flink/flink-docs-release-1.8/ops/cli.html),目前只支持指定yid * 必选:否 @@ -163,16 +173,24 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack * [elasticsearch 结果表插件](docs/elasticsearchSink.md) * [hbase 结果表插件](docs/hbaseSink.md) * [mysql 结果表插件](docs/mysqlSink.md) +* [oracle 结果表插件](docs/oracleSink.md) * [mongo 结果表插件](docs/mongoSink.md) * [redis 结果表插件](docs/redisSink.md) * [cassandra 结果表插件](docs/cassandraSink.md) +* [kudu 结果表插件](docs/kuduSink.md) +* [postgresql 结果表插件](docs/postgresqlSink.md) +* [clickhouse 结果表插件](docs/clickhouseSink.md) ### 2.3 维表插件 * [hbase 维表插件](docs/hbaseSide.md) * [mysql 维表插件](docs/mysqlSide.md) +* [oracle 维表插件](docs/oracleSide.md) * [mongo 维表插件](docs/mongoSide.md) * [redis 维表插件](docs/redisSide.md) * [cassandra 维表插件](docs/cassandraSide.md) +* [kudu 维表插件](docs/kuduSide.md) +* [postgresql 维表插件](docs/postgresqlSide.md) +* [clickhouse 维表插件](docs/clickhouseSide.md) ## 3 性能指标(新增) @@ -203,7 +221,7 @@ sh submit.sh -sql D:\sideSql.txt -name xctest -remoteSqlPluginPath /opt/dtstack ``` -CREATE (scala|table) FUNCTION CHARACTER_LENGTH WITH com.dtstack.Kun +CREATE (scala|table|aggregate) FUNCTION CHARACTER_LENGTH WITH com.dtstack.Kun; CREATE TABLE MyTable( diff --git a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java index 3a7e56902..eecb1350e 100644 --- a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java +++ b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllReqRow.java @@ -36,8 +36,8 @@ import org.apache.calcite.sql.JoinType; import org.apache.commons.collections.CollectionUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; import org.apache.flink.util.Collector; diff --git a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java index 99a286f9f..fa665f9a0 100644 --- a/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java +++ b/cassandra/cassandra-side/cassandra-all-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAllSideInfo.java @@ -24,12 +24,10 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.cassandra.table.CassandraSideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.commons.collections.CollectionUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java index a4b04ed93..c2528d67b 100644 --- a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java +++ b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncReqRow.java @@ -45,7 +45,7 @@ import io.vertx.core.json.JsonArray; import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.configuration.Configuration; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; diff --git a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java index 8ff801064..3557f0f73 100644 --- a/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java +++ b/cassandra/cassandra-side/cassandra-async-side/src/main/java/com/dtstack/flink/sql/side/cassandra/CassandraAsyncSideInfo.java @@ -29,7 +29,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java index b1b36f7e8..205a0f769 100644 --- a/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java +++ b/cassandra/cassandra-side/cassandra-side-core/src/main/java/com/dtstack/flink/sql/side/cassandra/table/CassandraSideTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.side.cassandra.table; import com.dtstack.flink.sql.side.SideTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java index 7d52b23bb..c6626c42a 100644 --- a/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java +++ b/cassandra/cassandra-sink/src/main/java/com/dtstack/flink/sql/sink/cassandra/table/CassandraTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.sink.cassandra.table; import com.dtstack.flink.sql.table.TargetTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/clickhouse/clickhouse-side/clickhouse-all-side/pom.xml b/clickhouse/clickhouse-side/clickhouse-all-side/pom.xml new file mode 100644 index 000000000..8e6c6f51a --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-all-side/pom.xml @@ -0,0 +1,92 @@ + + + + sql.side.clickhouse + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.side.all.clickhouse + clickhouse-all-side + + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.clickhouse.core + ${sql.side.clickhouse.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllReqRow.java b/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllReqRow.java new file mode 100644 index 000000000..68c0c7984 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllReqRow.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.clickhouse; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; +import com.dtstack.flink.sql.util.DtStringUtil; +import com.dtstack.flink.sql.util.JDBCUtils; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.util.List; +import java.util.Map; + +public class ClickhouseAllReqRow extends RdbAllReqRow { + + private static final Logger LOG = LoggerFactory.getLogger(ClickhouseAllReqRow.class); + + private static final String CLICKHOUSE_DRIVER = "ru.yandex.clickhouse.ClickHouseDriver"; + + public ClickhouseAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new ClickhouseAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + @Override + public Connection getConn(String dbURL, String userName, String passWord) { + try { + Connection connection ; + JDBCUtils.forName(CLICKHOUSE_DRIVER, getClass().getClassLoader()); + // ClickHouseProperties contains all properties + if (userName == null) { + connection = DriverManager.getConnection(dbURL); + } else { + connection = DriverManager.getConnection(dbURL, userName, passWord); + } + return connection; + } catch (Exception e) { + LOG.error("", e); + throw new RuntimeException("", e); + } + } + +} diff --git a/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllSideInfo.java b/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllSideInfo.java new file mode 100644 index 000000000..973c069b9 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-all-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAllSideInfo.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.clickhouse; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + + +public class ClickhouseAllSideInfo extends RdbAllSideInfo { + public ClickhouseAllSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/clickhouse/clickhouse-side/clickhouse-async-side/pom.xml b/clickhouse/clickhouse-side/clickhouse-async-side/pom.xml new file mode 100644 index 000000000..ab889c5a8 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-async-side/pom.xml @@ -0,0 +1,91 @@ + + + + sql.side.clickhouse + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.side.async.clickhouse + clickhouse-async-side + + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.clickhouse.core + ${sql.side.clickhouse.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncReqRow.java b/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncReqRow.java new file mode 100644 index 000000000..305d65118 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncReqRow.java @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.clickhouse; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncReqRow; +import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import io.vertx.core.Vertx; +import io.vertx.core.VertxOptions; +import io.vertx.core.json.JsonObject; +import io.vertx.ext.jdbc.JDBCClient; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; + +import java.util.List; + + +public class ClickhouseAsyncReqRow extends RdbAsyncReqRow { + private static final String CLICKHOUSE_DRIVER = "ru.yandex.clickhouse.ClickHouseDriver"; + + public ClickhouseAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new ClickhouseAsyncSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + JsonObject clickhouseClientConfig = new JsonObject(); + RdbSideTableInfo rdbSideTableInfo = (RdbSideTableInfo) sideInfo.getSideTableInfo(); + clickhouseClientConfig.put("url", rdbSideTableInfo.getUrl()) + .put("driver_class", CLICKHOUSE_DRIVER) + .put("max_pool_size", DEFAULT_MAX_DB_CONN_POOL_SIZE) + .put("user", rdbSideTableInfo.getUserName()) + .put("password", rdbSideTableInfo.getPassword()) + .put("provider_class", DT_PROVIDER_CLASS); + System.setProperty("vertx.disableFileCPResolving", "true"); + VertxOptions vo = new VertxOptions(); + vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); + vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + vo.setFileResolverCachingEnabled(false); + Vertx vertx = Vertx.vertx(vo); + setRdbSQLClient(JDBCClient.createNonShared(vertx, clickhouseClientConfig)); + } + +} diff --git a/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncSideInfo.java b/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncSideInfo.java new file mode 100644 index 000000000..254561de0 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-async-side/src/main/java/com/dtstack/flink/sql/side/clickhouse/ClickhouseAsyncSideInfo.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.clickhouse; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + + +public class ClickhouseAsyncSideInfo extends RdbAsyncSideInfo { + + public ClickhouseAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/clickhouse/clickhouse-side/clickhouse-side-core/pom.xml b/clickhouse/clickhouse-side/clickhouse-side-core/pom.xml new file mode 100644 index 000000000..29304d922 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-side-core/pom.xml @@ -0,0 +1,17 @@ + + + + sql.side.clickhouse + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.side.clickhouse.core + 1.0-SNAPSHOT + jar + clickhouse-side-core + + \ No newline at end of file diff --git a/clickhouse/clickhouse-side/clickhouse-side-core/src/main/java/com/dtstack/flink/sql/side/clickhouse/table/ClickhouseSideParser.java b/clickhouse/clickhouse-side/clickhouse-side-core/src/main/java/com/dtstack/flink/sql/side/clickhouse/table/ClickhouseSideParser.java new file mode 100644 index 000000000..7be387fd8 --- /dev/null +++ b/clickhouse/clickhouse-side/clickhouse-side-core/src/main/java/com/dtstack/flink/sql/side/clickhouse/table/ClickhouseSideParser.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.clickhouse.table; + +import com.dtstack.flink.sql.side.rdb.table.RdbSideParser; +import com.dtstack.flink.sql.table.TableInfo; +import ru.yandex.clickhouse.domain.ClickHouseDataType; + +import java.util.Map; + +/** + * Reason: + * Date: 2019/11/04 + * Company: www.dtstack.com + * + * @author maqi + */ + +public class ClickhouseSideParser extends RdbSideParser { + + private static final String CURR_TYPE = "clickhouse"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + TableInfo clickhouseTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + clickhouseTableInfo.setType(CURR_TYPE); + return clickhouseTableInfo; + } + + @Override + public Class dbTypeConvertToJavaType(String fieldType) { + return ClickHouseDataType.fromTypeString(fieldType).getJavaClass(); + } + +} diff --git a/clickhouse/clickhouse-side/pom.xml b/clickhouse/clickhouse-side/pom.xml new file mode 100644 index 000000000..2ba6a14dc --- /dev/null +++ b/clickhouse/clickhouse-side/pom.xml @@ -0,0 +1,36 @@ + + + + sql.clickhouse + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.side.clickhouse + 1.0-SNAPSHOT + clickhouse-side + pom + + + clickhouse-side-core + clickhouse-async-side + clickhouse-all-side + + + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.rdb + ${rdb.side.version} + + + + \ No newline at end of file diff --git a/clickhouse/clickhouse-sink/pom.xml b/clickhouse/clickhouse-sink/pom.xml new file mode 100644 index 000000000..75aac514a --- /dev/null +++ b/clickhouse/clickhouse-sink/pom.xml @@ -0,0 +1,90 @@ + + + + sql.clickhouse + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.sink.clickhouse + clickhouse-sink + http://maven.apache.org + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.sink.rdb + ${sql.sink.rdb.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/ClickhouseSink.java b/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/ClickhouseSink.java new file mode 100644 index 000000000..66c1f26d5 --- /dev/null +++ b/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/ClickhouseSink.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.clickhouse; + + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.rdb.RdbSink; +import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; + +import java.util.List; +import java.util.Map; + + +public class ClickhouseSink extends RdbSink implements IStreamSinkGener { + + private static final String CLICKHOUSE_DRIVER = "ru.yandex.clickhouse.ClickHouseDriver"; + + public ClickhouseSink() { + } + + @Override + public RetractJDBCOutputFormat getOutputFormat() { + return new RetractJDBCOutputFormat(); + } + + @Override + public void buildSql(String scheam, String tableName, List fields) { + buildInsertSql(tableName, fields); + } + + @Override + public String buildUpdateSql(String schema, String tableName, List fieldNames, Map> realIndexes, List fullField) { + return null; + } + + private void buildInsertSql(String tableName, List fields) { + String sqlTmp = "insert into " + tableName + " (${fields}) values (${placeholder})"; + String fieldsStr = ""; + String placeholder = ""; + + for (String fieldName : fields) { + fieldsStr += ",`" + fieldName + "`"; + placeholder += ",?"; + } + + fieldsStr = fieldsStr.replaceFirst(",", ""); + placeholder = placeholder.replaceFirst(",", ""); + + sqlTmp = sqlTmp.replace("${fields}", fieldsStr).replace("${placeholder}", placeholder); + this.sql = sqlTmp; + System.out.println("---insert sql----"); + System.out.println(sql); + } + + + @Override + public String getDriverName() { + return CLICKHOUSE_DRIVER; + } + + +} diff --git a/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/table/ClickhouseSinkParser.java b/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/table/ClickhouseSinkParser.java new file mode 100644 index 000000000..8c3df93d7 --- /dev/null +++ b/clickhouse/clickhouse-sink/src/main/java/com/dtstack/flink/sql/sink/clickhouse/table/ClickhouseSinkParser.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.clickhouse.table; + +import com.dtstack.flink.sql.sink.rdb.table.RdbSinkParser; +import com.dtstack.flink.sql.table.TableInfo; +import ru.yandex.clickhouse.domain.ClickHouseDataType; + +import java.util.Map; + + +public class ClickhouseSinkParser extends RdbSinkParser { + private static final String CURR_TYPE = "clickhouse"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + TableInfo clickhouseTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + clickhouseTableInfo.setType(CURR_TYPE); + return clickhouseTableInfo; + } + + @Override + public Class dbTypeConvertToJavaType(String fieldType) { + return ClickHouseDataType.fromTypeString(fieldType).getJavaClass(); + } + +} \ No newline at end of file diff --git a/clickhouse/pom.xml b/clickhouse/pom.xml new file mode 100644 index 000000000..37589bb36 --- /dev/null +++ b/clickhouse/pom.xml @@ -0,0 +1,41 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.clickhouse + pom + + + clickhouse-side + clickhouse-sink + + + + + 1.0-SNAPSHOT + 0.1.55 + + + + + com.dtstack.flink + sql.core + ${sql.core.version} + provided + + + + ru.yandex.clickhouse + clickhouse-jdbc + ${clickhouse.jdbc.version} + + + + \ No newline at end of file diff --git a/core/pom.xml b/core/pom.xml index 8d829a438..c357b4c99 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -89,12 +89,6 @@ ${flink.version} - - org.apache.flink - flink-shaded-hadoop2 - 1.7.2 - - org.apache.flink flink-yarn_2.11 diff --git a/core/src/main/java/com/dtstack/flink/sql/Main.java b/core/src/main/java/com/dtstack/flink/sql/Main.java index 5c354572b..a08df9b05 100644 --- a/core/src/main/java/com/dtstack/flink/sql/Main.java +++ b/core/src/main/java/com/dtstack/flink/sql/Main.java @@ -20,10 +20,12 @@ package com.dtstack.flink.sql; -import com.dtstack.flink.sql.classloader.DtClassLoader; +import com.dtstack.flink.sql.config.CalciteConfig; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.constrant.ConfigConstrant; import com.dtstack.flink.sql.enums.ClusterMode; import com.dtstack.flink.sql.enums.ECacheType; +import com.dtstack.flink.sql.enums.EPluginLoadMode; import com.dtstack.flink.sql.environment.MyLocalStreamEnvironment; import com.dtstack.flink.sql.exec.FlinkSQLExec; import com.dtstack.flink.sql.option.OptionParser; @@ -40,13 +42,14 @@ import com.dtstack.flink.sql.sink.StreamSinkFactory; import com.dtstack.flink.sql.source.StreamSourceFactory; import com.dtstack.flink.sql.util.DtStringUtil; +import com.dtstack.flink.sql.util.PropertiesUtils; import com.dtstack.flink.sql.watermarker.WaterMarkerAssigner; import com.dtstack.flink.sql.util.FlinkUtil; import com.dtstack.flink.sql.util.PluginUtil; -import org.apache.calcite.config.Lex; import org.apache.calcite.sql.SqlInsert; import org.apache.calcite.sql.SqlNode; import org.apache.commons.io.Charsets; +import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.ExecutionConfig; import org.apache.flink.api.common.restartstrategy.RestartStrategies; import org.apache.flink.api.common.time.Time; @@ -55,11 +58,11 @@ import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.client.program.ContextEnvironment; import org.apache.flink.configuration.Configuration; -import org.apache.flink.shaded.guava18.com.google.common.base.Strings; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; -import org.apache.flink.shaded.guava18.com.google.common.collect.Sets; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Sets; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.streaming.api.environment.StreamContextEnvironment; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; @@ -76,15 +79,12 @@ import java.net.URL; import java.net.URLClassLoader; import java.net.URLDecoder; -import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.concurrent.TimeUnit; import com.dtstack.flink.sql.option.Options; -import org.apache.calcite.sql.parser.SqlParser.Config; /** * Date: 2018/6/26 @@ -100,10 +100,6 @@ public class Main { private static final Logger LOG = LoggerFactory.getLogger(Main.class); - private static Config config = org.apache.calcite.sql.parser.SqlParser - .configBuilder() - .setLex(Lex.MYSQL) - .build(); public static void main(String[] args) throws Exception { @@ -114,8 +110,10 @@ public static void main(String[] args) throws Exception { String addJarListStr = options.getAddjar(); String localSqlPluginPath = options.getLocalSqlPluginPath(); String remoteSqlPluginPath = options.getRemoteSqlPluginPath(); + String pluginLoadMode = options.getPluginLoadMode(); String deployMode = options.getMode(); String confProp = options.getConfProp(); + sql = URLDecoder.decode(sql, Charsets.UTF_8.name()); SqlParser.setLocalSqlPluginRoot(localSqlPluginPath); @@ -125,10 +123,6 @@ public static void main(String[] args) throws Exception { addJarFileList = objMapper.readValue(addJarListStr, List.class); } - ClassLoader threadClassLoader = Thread.currentThread().getContextClassLoader(); - DtClassLoader parentClassloader = new DtClassLoader(new URL[]{}, threadClassLoader); - Thread.currentThread().setContextClassLoader(parentClassloader); - confProp = URLDecoder.decode(confProp, Charsets.UTF_8.toString()); Properties confProperties = PluginUtil.jsonStrToObject(confProp, Properties.class); StreamExecutionEnvironment env = getStreamExeEnv(confProperties, deployMode); @@ -147,24 +141,22 @@ public static void main(String[] args) throws Exception { Map registerTableCache = Maps.newHashMap(); //register udf - registerUDF(sqlTree, jarURList, parentClassloader, tableEnv); + registerUDF(sqlTree, jarURList, tableEnv); //register table schema - registerTable(sqlTree, env, tableEnv, localSqlPluginPath, remoteSqlPluginPath, sideTableMap, registerTableCache); + registerTable(sqlTree, env, tableEnv, localSqlPluginPath, remoteSqlPluginPath, pluginLoadMode, sideTableMap, registerTableCache); - sqlTranslation(options,tableEnv,sqlTree,sideTableMap,registerTableCache); + sqlTranslation(localSqlPluginPath, tableEnv,sqlTree,sideTableMap,registerTableCache); if(env instanceof MyLocalStreamEnvironment) { - List urlList = new ArrayList<>(); - urlList.addAll(Arrays.asList(parentClassloader.getURLs())); - ((MyLocalStreamEnvironment) env).setClasspaths(urlList); + ((MyLocalStreamEnvironment) env).setClasspaths(ClassLoaderManager.getClassPath()); } env.execute(name); } - private static void sqlTranslation(Options options,StreamTableEnvironment tableEnv,SqlTree sqlTree,Map sideTableMap,Map registerTableCache) throws Exception { + private static void sqlTranslation(String localSqlPluginPath, StreamTableEnvironment tableEnv,SqlTree sqlTree,Map sideTableMap,Map registerTableCache) throws Exception { SideSqlExec sideSqlExec = new SideSqlExec(); - sideSqlExec.setLocalSqlPluginPath(options.getLocalSqlPluginPath()); + sideSqlExec.setLocalSqlPluginPath(localSqlPluginPath); for (CreateTmpTableParser.SqlParserResult result : sqlTree.getTmpSqlList()) { sideSqlExec.registerTmpTable(result, sideTableMap, tableEnv, registerTableCache); } @@ -179,7 +171,7 @@ private static void sqlTranslation(Options options,StreamTableEnvironment tableE CreateTmpTableParser.SqlParserResult tmp = sqlTree.getTmpTableMap().get(tableName); String realSql = DtStringUtil.replaceIgnoreQuota(result.getExecSql(), "`", ""); - SqlNode sqlNode = org.apache.calcite.sql.parser.SqlParser.create(realSql,config).parseStmt(); + SqlNode sqlNode = org.apache.calcite.sql.parser.SqlParser.create(realSql, CalciteConfig.MYSQL_LEX_CONFIG).parseStmt(); String tmpSql = ((SqlInsert) sqlNode).getSource().toString(); tmp.setExecSql(tmpSql); sideSqlExec.registerTmpTable(tmp, sideTableMap, tableEnv, registerTableCache); @@ -223,26 +215,25 @@ private static void addEnvClassPath(StreamExecutionEnvironment env, Set cla } } - private static void registerUDF(SqlTree sqlTree, List jarURList, URLClassLoader parentClassloader, - StreamTableEnvironment tableEnv) - throws ClassNotFoundException, NoSuchMethodException, IllegalAccessException, InvocationTargetException { - List funcList = sqlTree.getFunctionList(); - if (funcList.isEmpty()) { - return; - } - //load jar - URLClassLoader classLoader = FlinkUtil.loadExtraJar(jarURList, parentClassloader); + private static void registerUDF(SqlTree sqlTree, List jarURList, StreamTableEnvironment tableEnv) + throws NoSuchMethodException, IllegalAccessException, InvocationTargetException { //register urf + // udf和tableEnv须由同一个类加载器加载 + ClassLoader levelClassLoader = tableEnv.getClass().getClassLoader(); + URLClassLoader classLoader = null; + List funcList = sqlTree.getFunctionList(); for (CreateFuncParser.SqlParserResult funcInfo : funcList) { - FlinkUtil.registerUDF(funcInfo.getType(), funcInfo.getClassName(), funcInfo.getName(), - tableEnv, classLoader); + //classloader + if (classLoader == null) { + classLoader = FlinkUtil.loadExtraJar(jarURList, (URLClassLoader)levelClassLoader); + } + FlinkUtil.registerUDF(funcInfo.getType(), funcInfo.getClassName(), funcInfo.getName(), tableEnv, classLoader); } } - private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv, - String localSqlPluginPath, String remoteSqlPluginPath, - Map sideTableMap, Map registerTableCache) throws Exception { + private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv, String localSqlPluginPath, + String remoteSqlPluginPath, String pluginLoadMode, Map sideTableMap, Map registerTableCache) throws Exception { Set classPathSet = Sets.newHashSet(); WaterMarkerAssigner waterMarkerAssigner = new WaterMarkerAssigner(); for (TableInfo tableInfo : sqlTree.getTableInfoMap().values()) { @@ -277,18 +268,18 @@ private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment en LOG.info("registe table {} success.", tableInfo.getName()); } registerTableCache.put(tableInfo.getName(), regTable); - classPathSet.add(PluginUtil.getRemoteJarFilePath(tableInfo.getType(), SourceTableInfo.SOURCE_SUFFIX, remoteSqlPluginPath, localSqlPluginPath)); + classPathSet.add(buildSourceAndSinkPathByLoadMode(tableInfo.getType(), SourceTableInfo.SOURCE_SUFFIX, localSqlPluginPath, remoteSqlPluginPath, pluginLoadMode)); } else if (tableInfo instanceof TargetTableInfo) { TableSink tableSink = StreamSinkFactory.getTableSink((TargetTableInfo) tableInfo, localSqlPluginPath); TypeInformation[] flinkTypes = FlinkUtil.transformTypes(tableInfo.getFieldClasses()); tableEnv.registerTableSink(tableInfo.getName(), tableInfo.getFields(), flinkTypes, tableSink); - classPathSet.add( PluginUtil.getRemoteJarFilePath(tableInfo.getType(), TargetTableInfo.TARGET_SUFFIX, remoteSqlPluginPath, localSqlPluginPath)); + classPathSet.add(buildSourceAndSinkPathByLoadMode(tableInfo.getType(), TargetTableInfo.TARGET_SUFFIX, localSqlPluginPath, remoteSqlPluginPath, pluginLoadMode)); } else if(tableInfo instanceof SideTableInfo){ String sideOperator = ECacheType.ALL.name().equals(((SideTableInfo) tableInfo).getCacheType()) ? "all" : "async"; sideTableMap.put(tableInfo.getName(), (SideTableInfo) tableInfo); - classPathSet.add(PluginUtil.getRemoteSideJarFilePath(tableInfo.getType(), sideOperator, SideTableInfo.TARGET_SUFFIX, remoteSqlPluginPath, localSqlPluginPath)); + classPathSet.add(buildSidePathByLoadMode(tableInfo.getType(), sideOperator, SideTableInfo.TARGET_SUFFIX, localSqlPluginPath, remoteSqlPluginPath, pluginLoadMode)); }else { throw new RuntimeException("not support table type:" + tableInfo.getType()); } @@ -304,7 +295,23 @@ private static void registerTable(SqlTree sqlTree, StreamExecutionEnvironment en } } + private static URL buildSourceAndSinkPathByLoadMode(String type, String suffix, String localSqlPluginPath, String remoteSqlPluginPath, String pluginLoadMode) throws Exception { + if (StringUtils.equalsIgnoreCase(pluginLoadMode, EPluginLoadMode.CLASSPATH.name())) { + return PluginUtil.getRemoteJarFilePath(type, suffix, remoteSqlPluginPath, localSqlPluginPath); + } + return PluginUtil.getLocalJarFilePath(type, suffix, localSqlPluginPath); + } + + private static URL buildSidePathByLoadMode(String type, String operator, String suffix, String localSqlPluginPath, String remoteSqlPluginPath, String pluginLoadMode) throws Exception { + if (StringUtils.equalsIgnoreCase(pluginLoadMode, EPluginLoadMode.CLASSPATH.name())) { + return PluginUtil.getRemoteSideJarFilePath(type, operator, suffix, remoteSqlPluginPath, localSqlPluginPath); + } + return PluginUtil.getLocalSideJarFilePath(type, operator, suffix, localSqlPluginPath); + } + private static StreamExecutionEnvironment getStreamExeEnv(Properties confProperties, String deployMode) throws Exception { + confProperties = PropertiesUtils.propertiesTrim(confProperties); + StreamExecutionEnvironment env = !ClusterMode.local.name().equals(deployMode) ? StreamExecutionEnvironment.getExecutionEnvironment() : new MyLocalStreamEnvironment(); @@ -340,4 +347,5 @@ private static StreamExecutionEnvironment getStreamExeEnv(Properties confPropert FlinkUtil.openCheckpoint(env, confProperties); return env; } + } diff --git a/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderManager.java b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderManager.java new file mode 100644 index 000000000..6db1058e5 --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderManager.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.classloader; + +import com.dtstack.flink.sql.util.PluginUtil; +import org.apache.commons.lang3.StringUtils; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +/** + * company: www.dtstack.com + * author: toutian + * create: 2019/10/14 + */ +public class ClassLoaderManager { + + private static final Logger LOG = LoggerFactory.getLogger(ClassLoaderManager.class); + + private static Map pluginClassLoader = new ConcurrentHashMap<>(); + + public static R newInstance(String pluginJarPath, ClassLoaderSupplier supplier) throws Exception { + ClassLoader classLoader = retrieveClassLoad(pluginJarPath); + return ClassLoaderSupplierCallBack.callbackAndReset(supplier, classLoader); + } + + public static R newInstance(List jarUrls, ClassLoaderSupplier supplier) throws Exception { + ClassLoader classLoader = retrieveClassLoad(jarUrls); + return ClassLoaderSupplierCallBack.callbackAndReset(supplier, classLoader); + } + + private static DtClassLoader retrieveClassLoad(String pluginJarPath) { + return pluginClassLoader.computeIfAbsent(pluginJarPath, k -> { + try { + URL[] urls = PluginUtil.getPluginJarUrls(pluginJarPath); + ClassLoader parentClassLoader = Thread.currentThread().getContextClassLoader(); + DtClassLoader classLoader = new DtClassLoader(urls, parentClassLoader); + LOG.info("pluginJarPath:{} create ClassLoad successful...", pluginJarPath); + return classLoader; + } catch (Throwable e) { + LOG.error("retrieve ClassLoad happens error:{}", e); + throw new RuntimeException("retrieve ClassLoad happens error"); + } + }); + } + + private static DtClassLoader retrieveClassLoad(List jarUrls) { + jarUrls.sort(Comparator.comparing(URL::toString)); + String jarUrlkey = StringUtils.join(jarUrls, "_"); + return pluginClassLoader.computeIfAbsent(jarUrlkey, k -> { + try { + URL[] urls = jarUrls.toArray(new URL[jarUrls.size()]); + ClassLoader parentClassLoader = Thread.currentThread().getContextClassLoader(); + DtClassLoader classLoader = new DtClassLoader(urls, parentClassLoader); + LOG.info("jarUrl:{} create ClassLoad successful...", jarUrlkey); + return classLoader; + } catch (Throwable e) { + LOG.error("retrieve ClassLoad happens error:{}", e); + throw new RuntimeException("retrieve ClassLoad happens error"); + } + }); + } + + public static List getClassPath() { + List classPaths = new ArrayList<>(); + for (Map.Entry entry : pluginClassLoader.entrySet()) { + classPaths.addAll(Arrays.asList(entry.getValue().getURLs())); + } + return classPaths; + } +} diff --git a/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplier.java b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplier.java new file mode 100644 index 000000000..859aa75f4 --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplier.java @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.classloader; + +/** + * Represents a supplier of results. + * + *

There is no requirement that a new or distinct result be returned each + * time the supplier is invoked. + * + *

This is a functional interface + * whose functional method is {@link #get()}. + * + * @param the type of results supplied by this supplier + * + * @since 1.8 + */ +@FunctionalInterface +public interface ClassLoaderSupplier { + + /** + * Gets a result. + * + * @return a result + */ + T get(ClassLoader cl) throws Exception; +} diff --git a/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplierCallBack.java b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplierCallBack.java new file mode 100644 index 000000000..51d37ef5e --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/classloader/ClassLoaderSupplierCallBack.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.classloader; + +/** + * company: www.dtstack.com + * author: toutian + * create: 2019/10/14 + */ +public class ClassLoaderSupplierCallBack { + + public static R callbackAndReset(ClassLoaderSupplier supplier, ClassLoader toSetClassLoader) throws Exception { + ClassLoader oldClassLoader = Thread.currentThread().getContextClassLoader(); + Thread.currentThread().setContextClassLoader(toSetClassLoader); + try { + return supplier.get(toSetClassLoader); + } finally { + Thread.currentThread().setContextClassLoader(oldClassLoader); + } + } + + +} diff --git a/core/src/main/java/com/dtstack/flink/sql/config/CalciteConfig.java b/core/src/main/java/com/dtstack/flink/sql/config/CalciteConfig.java new file mode 100644 index 000000000..54ae66bbc --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/config/CalciteConfig.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.config; + +import org.apache.calcite.config.Lex; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.parser.SqlParser.Config; + +public class CalciteConfig { + + public static Config MYSQL_LEX_CONFIG = SqlParser + .configBuilder() + .setLex(Lex.MYSQL) + .build(); + + + +} diff --git a/core/src/main/java/com/dtstack/flink/sql/constrant/ConfigConstrant.java b/core/src/main/java/com/dtstack/flink/sql/constrant/ConfigConstrant.java index 6b6551f9a..76f5996c3 100644 --- a/core/src/main/java/com/dtstack/flink/sql/constrant/ConfigConstrant.java +++ b/core/src/main/java/com/dtstack/flink/sql/constrant/ConfigConstrant.java @@ -29,7 +29,9 @@ */ public class ConfigConstrant { - public static final String FLINK_CHECKPOINT_INTERVAL_KEY = "sql.checkpoint.interval"; + public static final String SQL_CHECKPOINT_INTERVAL_KEY = "sql.checkpoint.interval"; + // 兼容上层 + public static final String FLINK_CHECKPOINT_INTERVAL_KEY = "flink.checkpoint.interval"; public static final String FLINK_CHECKPOINT_MODE_KEY = "sql.checkpoint.mode"; @@ -37,7 +39,11 @@ public class ConfigConstrant { public static final String FLINK_MAXCONCURRENTCHECKPOINTS_KEY = "sql.max.concurrent.checkpoints"; - public static final String FLINK_CHECKPOINT_CLEANUPMODE_KEY = "sql.checkpoint.cleanup.mode"; + public static final String SQL_CHECKPOINT_CLEANUPMODE_KEY = "sql.checkpoint.cleanup.mode"; + + public static final String FLINK_CHECKPOINT_CLEANUPMODE_KEY = "flink.checkpoint.cleanup.mode"; + + public static final String FLINK_CHECKPOINT_DATAURI_KEY = "flinkCheckpointDataURI"; diff --git a/core/src/main/java/com/dtstack/flink/sql/enums/EPluginLoadMode.java b/core/src/main/java/com/dtstack/flink/sql/enums/EPluginLoadMode.java new file mode 100644 index 000000000..6cb027ac3 --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/enums/EPluginLoadMode.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.enums; + +/** + * + * CLASSPATH: plugin jar depends on each machine node. + * SHIPFILE: plugin jar only depends on the client submitted by the task. + * + */ +public enum EPluginLoadMode { + + CLASSPATH(0), + SHIPFILE(1); + + private int type; + + EPluginLoadMode(int type){ + this.type = type; + } + + public int getType(){ + return this.type; + } +} diff --git a/core/src/main/java/com/dtstack/flink/sql/option/OptionParser.java b/core/src/main/java/com/dtstack/flink/sql/option/OptionParser.java index 10e34a5e6..07860b608 100644 --- a/core/src/main/java/com/dtstack/flink/sql/option/OptionParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/option/OptionParser.java @@ -18,7 +18,7 @@ package com.dtstack.flink.sql.option; -import avro.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import com.dtstack.flink.sql.util.PluginUtil; import org.apache.commons.cli.BasicParser; import org.apache.commons.cli.CommandLine; diff --git a/core/src/main/java/com/dtstack/flink/sql/option/Options.java b/core/src/main/java/com/dtstack/flink/sql/option/Options.java index eef54a617..a653aa42e 100644 --- a/core/src/main/java/com/dtstack/flink/sql/option/Options.java +++ b/core/src/main/java/com/dtstack/flink/sql/option/Options.java @@ -19,6 +19,7 @@ package com.dtstack.flink.sql.option; import com.dtstack.flink.sql.enums.ClusterMode; +import com.dtstack.flink.sql.enums.EPluginLoadMode; /** @@ -71,6 +72,9 @@ public class Options { @OptionRequired(description = "yarn session configuration,such as yid") private String yarnSessionConf = "{}"; + @OptionRequired(description = "plugin load mode, by classpath or shipfile") + private String pluginLoadMode = EPluginLoadMode.CLASSPATH.name(); + public String getMode() { return mode; } @@ -182,4 +186,12 @@ public String getYarnSessionConf() { public void setYarnSessionConf(String yarnSessionConf) { this.yarnSessionConf = yarnSessionConf; } + + public String getPluginLoadMode() { + return pluginLoadMode; + } + + public void setPluginLoadMode(String pluginLoadMode) { + this.pluginLoadMode = pluginLoadMode; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java index 793dd6baa..670d98a7e 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateFuncParser.java @@ -32,7 +32,7 @@ public class CreateFuncParser implements IParser { - private static final String funcPatternStr = "(?i)\\s*create\\s+(scala|table)\\s+function\\s+(\\S+)\\s+WITH\\s+(\\S+)"; + private static final String funcPatternStr = "(?i)\\s*create\\s+(scala|table|aggregate)\\s+function\\s+(\\S+)\\s+WITH\\s+(\\S+)"; private static final Pattern funcPattern = Pattern.compile(funcPatternStr); diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java index 5e126e786..ae6e1f708 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTableParser.java @@ -21,7 +21,7 @@ package com.dtstack.flink.sql.parser; import com.dtstack.flink.sql.util.DtStringUtil; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import java.util.List; import java.util.Map; diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java index db18986b7..de7141eb5 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/CreateTmpTableParser.java @@ -25,8 +25,7 @@ import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; - +import com.google.common.collect.Lists; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java index ff2bb9e4b..a7c6db9eb 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/InsertSqlParser.java @@ -21,17 +21,11 @@ package com.dtstack.flink.sql.parser; import org.apache.calcite.config.Lex; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlInsert; -import org.apache.calcite.sql.SqlJoin; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlNode; -import org.apache.calcite.sql.SqlOrderBy; -import org.apache.calcite.sql.SqlSelect; +import org.apache.calcite.sql.*; import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; @@ -119,6 +113,10 @@ private static void parseNode(SqlNode sqlNode, SqlParseResult sqlParseResult){ sqlParseResult.addSourceTable(identifierNode.toString()); } break; + case MATCH_RECOGNIZE: + SqlMatchRecognize node = (SqlMatchRecognize) sqlNode; + sqlParseResult.addSourceTable(node.getTableRef().toString()); + break; case UNION: SqlNode unionLeft = ((SqlBasicCall)sqlNode).getOperands()[0]; SqlNode unionRight = ((SqlBasicCall)sqlNode).getOperands()[1]; diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/SqlParser.java b/core/src/main/java/com/dtstack/flink/sql/parser/SqlParser.java index e9fb68cfe..a76c1b31a 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/SqlParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/SqlParser.java @@ -25,8 +25,8 @@ import com.dtstack.flink.sql.table.TableInfoParser; import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.curator.org.apache.curator.shaded.com.google.common.collect.Lists; -import org.apache.flink.shaded.guava18.com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.base.Strings; import java.util.List; import java.util.Set; diff --git a/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java b/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java index 754de0819..1b64b7c68 100644 --- a/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java +++ b/core/src/main/java/com/dtstack/flink/sql/parser/SqlTree.java @@ -22,8 +22,8 @@ import com.dtstack.flink.sql.table.TableInfo; -import org.apache.flink.shaded.curator.org.apache.curator.shaded.com.google.common.collect.Maps; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Lists; import java.util.List; import java.util.Map; diff --git a/core/src/main/java/com/dtstack/flink/sql/side/FieldReplaceInfo.java b/core/src/main/java/com/dtstack/flink/sql/side/FieldReplaceInfo.java index bc716ddaa..37b23d046 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/FieldReplaceInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/FieldReplaceInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.side; -import org.apache.flink.calcite.shaded.com.google.common.collect.HashBasedTable; +import com.google.common.collect.HashBasedTable; /** * Reason: diff --git a/core/src/main/java/com/dtstack/flink/sql/side/JoinInfo.java b/core/src/main/java/com/dtstack/flink/sql/side/JoinInfo.java index 03dbde5a6..6fde02493 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/JoinInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/JoinInfo.java @@ -22,9 +22,10 @@ import org.apache.calcite.sql.JoinType; import org.apache.calcite.sql.SqlNode; -import org.apache.flink.calcite.shaded.com.google.common.base.Strings; +import com.google.common.base.Strings; import java.io.Serializable; +import java.util.Map; /** * Join信息 @@ -40,6 +41,8 @@ public class JoinInfo implements Serializable { //左表是否是维表 private boolean leftIsSideTable; + //左表是 转换后的中间表 + private boolean leftIsMidTable; //右表是否是维表 private boolean rightIsSideTable; @@ -63,6 +66,8 @@ public class JoinInfo implements Serializable { private SqlNode selectNode; private JoinType joinType; + // 左边是中间转换表,做表映射关系,给替换属性名称使用 + private Map leftTabMapping; public String getSideTableName(){ if(leftIsSideTable){ @@ -87,6 +92,22 @@ public String getNewTableName(){ return leftStr + "_" + rightTableName; } + public boolean isLeftIsMidTable() { + return leftIsMidTable; + } + + public void setLeftIsMidTable(boolean leftIsMidTable) { + this.leftIsMidTable = leftIsMidTable; + } + + public Map getLeftTabMapping() { + return leftTabMapping; + } + + public void setLeftTabMapping(Map leftTabMapping) { + this.leftTabMapping = leftTabMapping; + } + public String getNewTableAlias(){ return leftTableAlias + "_" + rightTableAlias; } diff --git a/core/src/main/java/com/dtstack/flink/sql/side/JoinScope.java b/core/src/main/java/com/dtstack/flink/sql/side/JoinScope.java index ba07e714a..c7a73e0d7 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/JoinScope.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/JoinScope.java @@ -23,8 +23,8 @@ import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import java.util.List; import java.util.Map; diff --git a/core/src/main/java/com/dtstack/flink/sql/side/ParserJoinField.java b/core/src/main/java/com/dtstack/flink/sql/side/ParserJoinField.java index df242a390..74d303c24 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/ParserJoinField.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/ParserJoinField.java @@ -27,7 +27,7 @@ import org.apache.calcite.sql.SqlSelect; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.Iterator; import java.util.List; @@ -41,8 +41,12 @@ public class ParserJoinField { + /** - * Need to parse the fields of information and where selectlist + * build row by field + * @param sqlNode select node + * @param scope join left and right table all info + * @param getAll true,get all fields from two tables; false, extract useful field from select node * @return */ public static List getRowTypeInfo(SqlNode sqlNode, JoinScope scope, boolean getAll){ diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideInfo.java b/core/src/main/java/com/dtstack/flink/sql/side/SideInfo.java index 97e5e555f..df41e1663 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideInfo.java @@ -27,8 +27,8 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import java.io.Serializable; import java.util.List; diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java b/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java index b0ccc5feb..c881d6344 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideSQLParser.java @@ -20,7 +20,8 @@ package com.dtstack.flink.sql.side; -import org.apache.calcite.config.Lex; +import com.dtstack.flink.sql.config.CalciteConfig; +import com.dtstack.flink.sql.util.ParseUtils; import org.apache.calcite.sql.JoinType; import org.apache.calcite.sql.SqlAsOperator; import org.apache.calcite.sql.SqlBasicCall; @@ -38,9 +39,17 @@ import org.apache.calcite.sql.parser.SqlParseException; import org.apache.calcite.sql.parser.SqlParser; import org.apache.calcite.sql.parser.SqlParserPos; -import org.apache.flink.calcite.shaded.com.google.common.base.Strings; -import org.apache.flink.calcite.shaded.com.google.common.collect.Queues; - +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.java.tuple.Tuple2; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import com.google.common.collect.Queues; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Map; import java.util.Queue; import java.util.Set; @@ -54,16 +63,20 @@ */ public class SideSQLParser { + private static final Logger LOG = LoggerFactory.getLogger(SideSQLParser.class); + + private final char SPLIT = '_'; + + private String tempSQL = "SELECT * FROM TMP"; public Queue getExeQueue(String exeSql, Set sideTableSet) throws SqlParseException { System.out.println("---exeSql---"); System.out.println(exeSql); + LOG.info("---exeSql---"); + LOG.info(exeSql); + Queue queueInfo = Queues.newLinkedBlockingQueue(); - SqlParser.Config config = SqlParser - .configBuilder() - .setLex(Lex.MYSQL) - .build(); - SqlParser sqlParser = SqlParser.create(exeSql,config); + SqlParser sqlParser = SqlParser.create(exeSql, CalciteConfig.MYSQL_LEX_CONFIG); SqlNode sqlNode = sqlParser.parseStmt(); parseSql(sqlNode, sideTableSet, queueInfo); queueInfo.offer(sqlNode); @@ -143,18 +156,47 @@ private Object parseSql(SqlNode sqlNode, Set sideTableSet, Queue return ""; } - private JoinInfo dealJoinNode(SqlJoin joinNode, Set sideTableSet, Queue queueInfo){ + private JoinInfo dealJoinNode(SqlJoin joinNode, Set sideTableSet, Queue queueInfo) { SqlNode leftNode = joinNode.getLeft(); SqlNode rightNode = joinNode.getRight(); JoinType joinType = joinNode.getJoinType(); String leftTbName = ""; String leftTbAlias = ""; + String rightTableName = ""; + String rightTableAlias = ""; + Map midTableMapping = null ; + boolean leftIsMidTable = false; + // 右节点已经被解析 + boolean rightIsParse = false; + Tuple2 rightTableNameAndAlias = null; + if(leftNode.getKind() == IDENTIFIER){ leftTbName = leftNode.toString(); }else if(leftNode.getKind() == JOIN){ - Object leftNodeJoinInfo = parseSql(leftNode, sideTableSet, queueInfo); - System.out.println(leftNodeJoinInfo); + JoinInfo leftNodeJoinInfo = (JoinInfo)parseSql(leftNode, sideTableSet, queueInfo);//解析多JOIN + + rightTableNameAndAlias = parseRightNode(rightNode, sideTableSet, queueInfo); + rightIsParse = true; + if (checkIsSideTable(rightTableNameAndAlias.f0, sideTableSet)) { + // select * from xxx + SqlNode sqlNode = buildSelectByLeftNode(leftNode); + // ( select * from xxx) as xxx_0 + SqlBasicCall newAsNode = buildAsNodeByJoinInfo(leftNodeJoinInfo, sqlNode); + leftNode = newAsNode; + joinNode.setLeft(leftNode); + + leftIsMidTable = true; + midTableMapping = saveTabMapping(leftNodeJoinInfo); + + AliasInfo aliasInfo = (AliasInfo) parseSql(newAsNode, sideTableSet, queueInfo); + leftTbName = aliasInfo.getName(); + leftTbAlias = aliasInfo.getAlias(); + } else { + leftTbName = leftNodeJoinInfo.getRightTableName(); + leftTbAlias = leftNodeJoinInfo.getRightTableAlias(); + } + }else if(leftNode.getKind() == AS){ AliasInfo aliasInfo = (AliasInfo) parseSql(leftNode, sideTableSet, queueInfo); leftTbName = aliasInfo.getName(); @@ -168,22 +210,23 @@ private JoinInfo dealJoinNode(SqlJoin joinNode, Set sideTableSet, Queue< throw new RuntimeException("side-table must be at the right of join operator"); } - String rightTableName = ""; - String rightTableAlias = ""; - - if(rightNode.getKind() == IDENTIFIER){ - rightTableName = rightNode.toString(); - }else{ - AliasInfo aliasInfo = (AliasInfo)parseSql(rightNode, sideTableSet, queueInfo); - rightTableName = aliasInfo.getName(); - rightTableAlias = aliasInfo.getAlias(); + if (!rightIsParse) { + rightTableNameAndAlias = parseRightNode(rightNode, sideTableSet, queueInfo); } + rightTableName = rightTableNameAndAlias.f0; + rightTableAlias = rightTableNameAndAlias.f1; boolean rightIsSide = checkIsSideTable(rightTableName, sideTableSet); if(joinType == JoinType.RIGHT){ throw new RuntimeException("side join not support join type of right[current support inner join and left join]"); } + if (leftIsMidTable) { + // 替换右边 on语句 中的字段别名 + SqlNode afterReplaceNameCondition = ParseUtils.replaceJoinConditionTabName(joinNode.getCondition(), midTableMapping); + joinNode.setOperand(5, afterReplaceNameCondition); + } + JoinInfo tableInfo = new JoinInfo(); tableInfo.setLeftTableName(leftTbName); tableInfo.setRightTableName(rightTableName); @@ -204,11 +247,105 @@ private JoinInfo dealJoinNode(SqlJoin joinNode, Set sideTableSet, Queue< tableInfo.setJoinType(joinType); tableInfo.setCondition(joinNode.getCondition()); + tableInfo.setLeftIsMidTable(leftIsMidTable); + tableInfo.setLeftTabMapping(midTableMapping); + return tableInfo; } + private Tuple2 parseRightNode(SqlNode sqlNode, Set sideTableSet, Queue queueInfo) { + Tuple2 tabName = new Tuple2<>("", ""); + if(sqlNode.getKind() == IDENTIFIER){ + tabName.f0 = sqlNode.toString(); + }else{ + AliasInfo aliasInfo = (AliasInfo)parseSql(sqlNode, sideTableSet, queueInfo); + tabName.f0 = aliasInfo.getName(); + tabName.f1 = aliasInfo.getAlias(); + } + return tabName; + } + + private Map saveTabMapping(JoinInfo leftNodeJoinInfo) { + Map midTableMapping = Maps.newHashMap();; + + String midTab = buidTableName(leftNodeJoinInfo.getLeftTableAlias(), SPLIT, leftNodeJoinInfo.getRightTableAlias()); + String finalMidTab = midTab + "_0"; + + if(leftNodeJoinInfo.isLeftIsMidTable()) { + midTableMapping.putAll(leftNodeJoinInfo.getLeftTabMapping()); + } + fillLeftAllTable(leftNodeJoinInfo, midTableMapping, finalMidTab); + return midTableMapping; + } + + private void fillLeftAllTable(JoinInfo leftNodeJoinInfo, Map midTableMapping, String finalMidTab) { + List tablesName = Lists.newArrayList(); + ParseUtils.parseLeftNodeTableName(leftNodeJoinInfo.getLeftNode(), tablesName); + + tablesName.forEach(tab ->{ + midTableMapping.put(tab, finalMidTab); + }); + midTableMapping.put(leftNodeJoinInfo.getRightTableAlias(), finalMidTab); + } + + + private SqlNode buildSelectByLeftNode(SqlNode leftNode) { + SqlParser sqlParser = SqlParser.create(tempSQL, CalciteConfig.MYSQL_LEX_CONFIG); + SqlNode sqlNode = null; + try { + sqlNode = sqlParser.parseStmt(); + }catch (Exception e) { + LOG.error("tmp sql parse error..", e); + } + + ((SqlSelect) sqlNode).setFrom(leftNode); + return sqlNode; + } + private void dealSelectResultWithJoinInfo(JoinInfo joinInfo, SqlSelect sqlNode, Queue queueInfo){ + // 中间虚拟表进行表名称替换 + if (joinInfo.isLeftIsMidTable()){ + SqlNode whereNode = sqlNode.getWhere(); + SqlNodeList sqlGroup = sqlNode.getGroup(); + SqlNodeList sqlSelectList = sqlNode.getSelectList(); + List newSelectNodeList = Lists.newArrayList(); + + for( int i=0; i sideTableList){ if(sideTableList.contains(tableName)){ return true; } - return false; } } diff --git a/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java b/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java index e83eda37a..bfccd02f7 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/SideSqlExec.java @@ -17,6 +17,7 @@ */ + package com.dtstack.flink.sql.side; import com.dtstack.flink.sql.enums.ECacheType; @@ -47,9 +48,9 @@ import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.HashBasedTable; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.HashBasedTable; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.streaming.api.datastream.DataStream; import org.apache.flink.table.api.Table; import org.apache.flink.table.api.java.StreamTableEnvironment; @@ -112,6 +113,8 @@ public void exec(String sql, Map sideTableMap, StreamTabl } if(pollSqlNode.getKind() == INSERT){ + System.out.println("----------real exec sql-----------" ); + System.out.println(pollSqlNode.toString()); FlinkSQLExec.sqlUpdate(tableEnv, pollSqlNode.toString()); if(LOG.isInfoEnabled()){ LOG.info("exec sql: " + pollSqlNode.toString()); @@ -360,7 +363,14 @@ private SqlNode replaceOrderByTableName(SqlNode orderNode, String tableAlias) { private SqlNode replaceNodeInfo(SqlNode groupNode, HashBasedTable mappingTable, String tableAlias){ if(groupNode.getKind() == IDENTIFIER){ SqlIdentifier sqlIdentifier = (SqlIdentifier) groupNode; + if(sqlIdentifier.names.size() == 1){ + return sqlIdentifier; + } String mappingFieldName = mappingTable.get(sqlIdentifier.getComponent(0).getSimple(), sqlIdentifier.getComponent(1).getSimple()); + + if(mappingFieldName == null){ + throw new RuntimeException("can't find mapping fieldName:" + sqlIdentifier.toString() ); + } sqlIdentifier = sqlIdentifier.setName(0, tableAlias); return sqlIdentifier.setName(1, mappingFieldName); }else if(groupNode instanceof SqlBasicCall){ @@ -510,6 +520,8 @@ private SqlNode replaceSelectFieldName(SqlNode selectNode, HashBasedTable sideParser = dtClassLoader.loadClass(className); - if(!AbsSideTableParser.class.isAssignableFrom(sideParser)){ - throw new RuntimeException("class " + sideParser.getName() + " not subClass of AbsSideTableParser"); - } - - return sideParser.asSubclass(AbsTableParser.class).newInstance(); + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> { + Class sideParser = cl.loadClass(className); + if (!AbsSideTableParser.class.isAssignableFrom(sideParser)) { + throw new RuntimeException("class " + sideParser.getName() + " not subClass of AbsSideTableParser"); + } + return sideParser.asSubclass(AbsTableParser.class).newInstance(); + }); } } diff --git a/core/src/main/java/com/dtstack/flink/sql/side/cache/LRUSideCache.java b/core/src/main/java/com/dtstack/flink/sql/side/cache/LRUSideCache.java index 475536a4c..700e13bb2 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/cache/LRUSideCache.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/cache/LRUSideCache.java @@ -21,8 +21,8 @@ package com.dtstack.flink.sql.side.cache; import com.dtstack.flink.sql.side.SideTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.cache.Cache; -import org.apache.flink.calcite.shaded.com.google.common.cache.CacheBuilder; +import com.google.common.cache.Cache; +import com.google.common.cache.CacheBuilder; import java.util.concurrent.TimeUnit; diff --git a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java index 688a19c35..290804200 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideAsyncOperator.java @@ -19,7 +19,7 @@ package com.dtstack.flink.sql.side.operator; -import com.dtstack.flink.sql.classloader.DtClassLoader; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.side.AsyncReqRow; import com.dtstack.flink.sql.side.FieldInfo; import com.dtstack.flink.sql.side.JoinInfo; @@ -51,14 +51,13 @@ public class SideAsyncOperator { private static AsyncReqRow loadAsyncReq(String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) throws Exception { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); String pathOfType = String.format(PATH_FORMAT, sideType); String pluginJarPath = PluginUtil.getJarFileDirPath(pathOfType, sqlRootDir); - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); String className = PluginUtil.getSqlSideClassName(sideType, "side", OPERATOR_TYPE); - return dtClassLoader.loadClass(className).asSubclass(AsyncReqRow.class) - .getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class).newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> + cl.loadClass(className).asSubclass(AsyncReqRow.class) + .getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class) + .newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); } public static DataStream getSideJoinDataStream(DataStream inputStream, String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, diff --git a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java index 725798848..5aa810b0f 100644 --- a/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java +++ b/core/src/main/java/com/dtstack/flink/sql/side/operator/SideWithAllCacheOperator.java @@ -19,7 +19,7 @@ package com.dtstack.flink.sql.side.operator; -import com.dtstack.flink.sql.classloader.DtClassLoader; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.side.AllReqRow; import com.dtstack.flink.sql.side.FieldInfo; import com.dtstack.flink.sql.side.JoinInfo; @@ -28,7 +28,6 @@ import org.apache.flink.api.java.typeutils.RowTypeInfo; import org.apache.flink.streaming.api.datastream.DataStream; -import java.net.MalformedURLException; import java.util.List; /** @@ -49,18 +48,13 @@ private static AllReqRow loadFlatMap(String sideType, String sqlRootDir, RowType JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) throws Exception { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); String pathOfType = String.format(PATH_FORMAT, sideType); String pluginJarPath = PluginUtil.getJarFileDirPath(pathOfType, sqlRootDir); - - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); String className = PluginUtil.getSqlSideClassName(sideType, "side", OPERATOR_TYPE); - return dtClassLoader.loadClass(className).asSubclass(AllReqRow.class).getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class) - .newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); - - + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> cl.loadClass(className).asSubclass(AllReqRow.class) + .getConstructor(RowTypeInfo.class, JoinInfo.class, List.class, SideTableInfo.class) + .newInstance(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); } public static DataStream getSideJoinDataStream(DataStream inputStream, String sideType, String sqlRootDir, RowTypeInfo rowTypeInfo, JoinInfo joinInfo, diff --git a/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java b/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java index ebda80c8a..53460081d 100644 --- a/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java +++ b/core/src/main/java/com/dtstack/flink/sql/sink/StreamSinkFactory.java @@ -20,12 +20,11 @@ package com.dtstack.flink.sql.sink; -import com.dtstack.flink.sql.classloader.DtClassLoader; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.table.AbsTableParser; import com.dtstack.flink.sql.table.TargetTableInfo; import com.dtstack.flink.sql.util.DtStringUtil; import com.dtstack.flink.sql.util.PluginUtil; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; import org.apache.flink.table.sinks.TableSink; /** @@ -42,51 +41,33 @@ public class StreamSinkFactory { private static final String DIR_NAME_FORMAT = "%ssink"; public static AbsTableParser getSqlParser(String pluginType, String sqlRootDir) throws Exception { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - - if(!(classLoader instanceof DtClassLoader)){ - throw new RuntimeException("it's not a correct classLoader instance, it's type must be DtClassLoader!"); - } - - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - String pluginJarPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, pluginType), sqlRootDir); - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(pluginType); String className = PluginUtil.getSqlParserClassName(typeNoVersion, CURR_TYPE); - Class targetParser = dtClassLoader.loadClass(className); - if(!AbsTableParser.class.isAssignableFrom(targetParser)){ - throw new RuntimeException("class " + targetParser.getName() + " not subClass of AbsTableParser"); - } - - return targetParser.asSubclass(AbsTableParser.class).newInstance(); + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> { + Class targetParser = cl.loadClass(className); + if(!AbsTableParser.class.isAssignableFrom(targetParser)){ + throw new RuntimeException("class " + targetParser.getName() + " not subClass of AbsTableParser"); + } + return targetParser.asSubclass(AbsTableParser.class).newInstance(); + }); } public static TableSink getTableSink(TargetTableInfo targetTableInfo, String localSqlRootDir) throws Exception { - - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - if(!(classLoader instanceof DtClassLoader)){ - throw new RuntimeException("it's not a correct classLoader instance, it's type must be DtClassLoader!"); - } - - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - String pluginType = targetTableInfo.getType(); String pluginJarDirPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, pluginType), localSqlRootDir); - - PluginUtil.addPluginJar(pluginJarDirPath, dtClassLoader); - String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(pluginType); String className = PluginUtil.getGenerClassName(typeNoVersion, CURR_TYPE); - Class sinkClass = dtClassLoader.loadClass(className); - - if(!IStreamSinkGener.class.isAssignableFrom(sinkClass)){ - throw new RuntimeException("class " + sinkClass + " not subClass of IStreamSinkGener"); - } - IStreamSinkGener streamSinkGener = sinkClass.asSubclass(IStreamSinkGener.class).newInstance(); - Object result = streamSinkGener.genStreamSink(targetTableInfo); - return (TableSink) result; + return ClassLoaderManager.newInstance(pluginJarDirPath, (cl) -> { + Class sinkClass = cl.loadClass(className); + if(!IStreamSinkGener.class.isAssignableFrom(sinkClass)){ + throw new RuntimeException("class " + sinkClass + " not subClass of IStreamSinkGener"); + } + IStreamSinkGener streamSinkGener = sinkClass.asSubclass(IStreamSinkGener.class).newInstance(); + Object result = streamSinkGener.genStreamSink(targetTableInfo); + return (TableSink) result; + }); } } diff --git a/core/src/main/java/com/dtstack/flink/sql/source/StreamSourceFactory.java b/core/src/main/java/com/dtstack/flink/sql/source/StreamSourceFactory.java index b8dfe66e2..1057fb0ed 100644 --- a/core/src/main/java/com/dtstack/flink/sql/source/StreamSourceFactory.java +++ b/core/src/main/java/com/dtstack/flink/sql/source/StreamSourceFactory.java @@ -21,12 +21,11 @@ package com.dtstack.flink.sql.source; -import com.dtstack.flink.sql.classloader.DtClassLoader; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.table.AbsSourceParser; import com.dtstack.flink.sql.table.SourceTableInfo; import com.dtstack.flink.sql.util.DtStringUtil; import com.dtstack.flink.sql.util.PluginUtil; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.table.api.Table; import org.apache.flink.table.api.java.StreamTableEnvironment; @@ -46,21 +45,16 @@ public class StreamSourceFactory { public static AbsSourceParser getSqlParser(String pluginType, String sqlRootDir) throws Exception { - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - String pluginJarPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, pluginType), sqlRootDir); - - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); - String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(pluginType); String className = PluginUtil.getSqlParserClassName(typeNoVersion, CURR_TYPE); - Class sourceParser = dtClassLoader.loadClass(className); - if(!AbsSourceParser.class.isAssignableFrom(sourceParser)){ - throw new RuntimeException("class " + sourceParser.getName() + " not subClass of AbsSourceParser"); - } - - return sourceParser.asSubclass(AbsSourceParser.class).newInstance(); + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> { + Class sourceParser = cl.loadClass(className); + if(!AbsSourceParser.class.isAssignableFrom(sourceParser)){ + throw new RuntimeException("class " + sourceParser.getName() + " not subClass of AbsSourceParser"); + } + return sourceParser.asSubclass(AbsSourceParser.class).newInstance(); + }); } /** @@ -73,21 +67,17 @@ public static Table getStreamSource(SourceTableInfo sourceTableInfo, StreamExecu String sourceTypeStr = sourceTableInfo.getType(); String typeNoVersion = DtStringUtil.getPluginTypeWithoutVersion(sourceTypeStr); - ClassLoader classLoader = Thread.currentThread().getContextClassLoader(); - String pluginJarPath = PluginUtil.getJarFileDirPath(String.format(DIR_NAME_FORMAT, sourceTypeStr), sqlRootDir); String className = PluginUtil.getGenerClassName(typeNoVersion, CURR_TYPE); - DtClassLoader dtClassLoader = (DtClassLoader) classLoader; - PluginUtil.addPluginJar(pluginJarPath, dtClassLoader); - Class sourceClass = dtClassLoader.loadClass(className); - - if(!IStreamSourceGener.class.isAssignableFrom(sourceClass)){ - throw new RuntimeException("class " + sourceClass.getName() + " not subClass of IStreamSourceGener"); - } + return ClassLoaderManager.newInstance(pluginJarPath, (cl) -> { + Class sourceClass = cl.loadClass(className); + if(!IStreamSourceGener.class.isAssignableFrom(sourceClass)){ + throw new RuntimeException("class " + sourceClass.getName() + " not subClass of IStreamSourceGener"); + } - IStreamSourceGener sourceGener = sourceClass.asSubclass(IStreamSourceGener.class).newInstance(); - Object object = sourceGener.genStreamSource(sourceTableInfo, env, tableEnv); - return (Table) object; + IStreamSourceGener sourceGener = sourceClass.asSubclass(IStreamSourceGener.class).newInstance(); + return (Table) sourceGener.genStreamSource(sourceTableInfo, env, tableEnv); + }); } } diff --git a/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java b/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java index ae8135256..3c4199c8c 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/AbsSideTableParser.java @@ -23,8 +23,6 @@ import com.dtstack.flink.sql.enums.ECacheType; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.util.MathUtil; -import org.apache.commons.lang3.StringUtils; - import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; diff --git a/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java b/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java index fc29932bf..aa0639c8a 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/AbsTableParser.java @@ -22,9 +22,8 @@ import com.dtstack.flink.sql.util.ClassUtil; import com.dtstack.flink.sql.util.DtStringUtil; -import org.apache.commons.lang3.StringUtils; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.shaded.curator.org.apache.curator.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import java.util.List; import java.util.Map; @@ -99,7 +98,7 @@ public void parseFieldsInfo(String fieldsInfo, TableInfo tableInfo){ System.arraycopy(filedInfoArr, 0, filedNameArr, 0, filedInfoArr.length - 1); String fieldName = String.join(" ", filedNameArr); String fieldType = filedInfoArr[filedInfoArr.length - 1 ].trim(); - Class fieldClass = ClassUtil.stringConvertClass(fieldType); + Class fieldClass = dbTypeConvertToJavaType(fieldType); tableInfo.addPhysicalMappings(filedInfoArr[0],filedInfoArr[0]); tableInfo.addField(fieldName); @@ -117,4 +116,9 @@ public static void dealPrimaryKey(Matcher matcher, TableInfo tableInfo){ List primaryKes = Lists.newArrayList(splitArry); tableInfo.setPrimaryKeys(primaryKes); } + + public Class dbTypeConvertToJavaType(String fieldType) { + return ClassUtil.stringConvertClass(fieldType); + } + } diff --git a/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java b/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java index d809dc3b5..9a41fa0a1 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/SourceTableInfo.java @@ -20,9 +20,9 @@ package com.dtstack.flink.sql.table; -import org.apache.flink.calcite.shaded.com.google.common.base.Strings; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.base.Strings; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.util.StringUtils; import java.util.ArrayList; diff --git a/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java b/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java index ebda7d120..a2454b893 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/TableInfo.java @@ -20,8 +20,8 @@ package com.dtstack.flink.sql.table; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import java.io.Serializable; import java.util.List; diff --git a/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java b/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java index 4c2f67f68..ae98d90ae 100644 --- a/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java +++ b/core/src/main/java/com/dtstack/flink/sql/table/TableInfoParser.java @@ -27,8 +27,8 @@ import com.dtstack.flink.sql.sink.StreamSinkFactory; import com.dtstack.flink.sql.source.StreamSourceFactory; import com.dtstack.flink.sql.util.MathUtil; -import org.apache.flink.calcite.shaded.com.google.common.base.Strings; -import org.apache.flink.shaded.curator.org.apache.curator.shaded.com.google.common.collect.Maps; +import com.google.common.base.Strings; +import com.google.common.collect.Maps; import java.util.Map; import java.util.regex.Matcher; diff --git a/core/src/main/java/com/dtstack/flink/sql/udf/TimestampUdf.java b/core/src/main/java/com/dtstack/flink/sql/udf/TimestampUdf.java deleted file mode 100644 index 9f605dde3..000000000 --- a/core/src/main/java/com/dtstack/flink/sql/udf/TimestampUdf.java +++ /dev/null @@ -1,24 +0,0 @@ -package com.dtstack.flink.sql.udf; - -import org.apache.flink.table.functions.FunctionContext; -import org.apache.flink.table.functions.ScalarFunction; - -import java.sql.Timestamp; - -public class TimestampUdf extends ScalarFunction { - @Override - public void open(FunctionContext context) { - } - public static Timestamp eval(String timestamp) { - if (timestamp.length() == 13){ - return new Timestamp(Long.parseLong(timestamp)); - }else if (timestamp.length() == 10){ - return new Timestamp(Long.parseLong(timestamp)*1000); - } else{ - return Timestamp.valueOf(timestamp); - } - } - @Override - public void close() { - } -} diff --git a/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java b/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java index e07f79a4d..862b0700e 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/DtStringUtil.java @@ -22,9 +22,9 @@ import com.dtstack.flink.sql.enums.ColumnType; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.calcite.shaded.com.google.common.base.Strings; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Strings; +import com.google.common.collect.Maps; +import com.fasterxml.jackson.databind.ObjectMapper; import java.sql.Timestamp; import java.math.BigDecimal; import java.util.ArrayList; @@ -59,33 +59,40 @@ public static List splitIgnoreQuota(String str, char delimiter){ boolean inSingleQuotes = false; int bracketLeftNum = 0; StringBuilder b = new StringBuilder(); - for (char c : str.toCharArray()) { - if(c == delimiter){ + char[] chars = str.toCharArray(); + int idx = 0; + for (char c : chars) { + char flag = 0; + if (idx > 0) { + flag = chars[idx - 1]; + } + if (c == delimiter) { if (inQuotes) { b.append(c); - } else if(inSingleQuotes){ + } else if (inSingleQuotes) { b.append(c); - } else if(bracketLeftNum > 0){ + } else if (bracketLeftNum > 0) { b.append(c); - }else { + } else { tokensList.add(b.toString()); b = new StringBuilder(); } - }else if(c == '\"'){ + } else if (c == '\"' && '\\' != flag && !inSingleQuotes) { inQuotes = !inQuotes; b.append(c); - }else if(c == '\''){ + } else if (c == '\'' && '\\' != flag && !inQuotes) { inSingleQuotes = !inSingleQuotes; b.append(c); - }else if(c == '('){ + } else if (c == '(' && !inSingleQuotes && !inQuotes) { bracketLeftNum++; b.append(c); - }else if(c == ')'){ + } else if (c == ')' && !inSingleQuotes && !inQuotes) { bracketLeftNum--; b.append(c); - }else{ + } else { b.append(c); } + idx++; } tokensList.add(b.toString()); @@ -248,7 +255,30 @@ public static Object parse(String str,Class clazz){ return object; } + public static String firstUpperCase(String str) { return str.substring(0, 1).toUpperCase() + str.substring(1); } + + public static String getTableFullPath(String schema, String tableName) { + if (StringUtils.isEmpty(schema)){ + return addQuoteForStr(tableName); + } + String schemaAndTabName = addQuoteForStr(schema) + "." + addQuoteForStr(tableName); + return schemaAndTabName; + } + + + + public static String addQuoteForStr(String column) { + return getStartQuote() + column + getEndQuote(); + } + + public static String getStartQuote() { + return "\""; + } + + public static String getEndQuote() { + return "\""; + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/util/FlinkUtil.java b/core/src/main/java/com/dtstack/flink/sql/util/FlinkUtil.java index 187aaa219..5811105e4 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/FlinkUtil.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/FlinkUtil.java @@ -16,11 +16,12 @@ * limitations under the License. */ - + package com.dtstack.flink.sql.util; +import com.dtstack.flink.sql.classloader.ClassLoaderManager; import com.dtstack.flink.sql.constrant.ConfigConstrant; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; @@ -34,6 +35,8 @@ import org.apache.flink.table.api.java.StreamTableEnvironment; import org.apache.flink.table.functions.ScalarFunction; import org.apache.flink.table.functions.TableFunction; +import org.apache.flink.table.functions.AggregateFunction; + import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -68,12 +71,14 @@ public static void openCheckpoint(StreamExecutionEnvironment env, Properties pro } //设置了时间间隔才表明开启了checkpoint - if(properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_INTERVAL_KEY) == null){ + if(properties.getProperty(ConfigConstrant.SQL_CHECKPOINT_INTERVAL_KEY) == null && properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_INTERVAL_KEY) == null){ return; }else{ - Long interval = Long.valueOf(properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_INTERVAL_KEY)); + Long sql_interval = Long.valueOf(properties.getProperty(ConfigConstrant.SQL_CHECKPOINT_INTERVAL_KEY,"0")); + Long flink_interval = Long.valueOf(properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_INTERVAL_KEY, "0")); + long checkpointInterval = Math.max(sql_interval, flink_interval); //start checkpoint every ${interval} - env.enableCheckpointing(interval); + env.enableCheckpointing(checkpointInterval); } String checkMode = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_MODE_KEY); @@ -101,7 +106,14 @@ public static void openCheckpoint(StreamExecutionEnvironment env, Properties pro env.getCheckpointConfig().setMaxConcurrentCheckpoints(maxConcurrCheckpoints); } - String cleanupModeStr = properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_CLEANUPMODE_KEY); + Boolean sqlCleanMode = MathUtil.getBoolean(properties.getProperty(ConfigConstrant.SQL_CHECKPOINT_CLEANUPMODE_KEY), false); + Boolean flinkCleanMode = MathUtil.getBoolean(properties.getProperty(ConfigConstrant.FLINK_CHECKPOINT_CLEANUPMODE_KEY), false); + + String cleanupModeStr = "false"; + if (sqlCleanMode || flinkCleanMode ){ + cleanupModeStr = "true"; + } + if ("true".equalsIgnoreCase(cleanupModeStr)){ env.getCheckpointConfig().enableExternalizedCheckpoints( CheckpointConfig.ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION); @@ -147,20 +159,19 @@ public static void setStreamTimeCharacteristic(StreamExecutionEnvironment env, P } - /** - * FIXME 暂时不支持 UDF 实现类--有参构造方法 - * TABLE|SCALA + * TABLE|SCALA|AGGREGATE * 注册UDF到table env */ - public static void registerUDF(String type, String classPath, String funcName, TableEnvironment tableEnv, - ClassLoader classLoader){ + public static void registerUDF(String type, String classPath, String funcName, TableEnvironment tableEnv, ClassLoader classLoader){ if("SCALA".equalsIgnoreCase(type)){ registerScalaUDF(classPath, funcName, tableEnv, classLoader); }else if("TABLE".equalsIgnoreCase(type)){ registerTableUDF(classPath, funcName, tableEnv, classLoader); + }else if("AGGREGATE".equalsIgnoreCase(type)){ + registerAggregateUDF(classPath, funcName, tableEnv, classLoader); }else{ - throw new RuntimeException("not support of UDF which is not in (TABLE, SCALA)"); + throw new RuntimeException("not support of UDF which is not in (TABLE, SCALA, AGGREGATE)"); } } @@ -171,8 +182,7 @@ public static void registerUDF(String type, String classPath, String funcName, T * @param funcName * @param tableEnv */ - public static void registerScalaUDF(String classPath, String funcName, TableEnvironment tableEnv, - ClassLoader classLoader){ + public static void registerScalaUDF(String classPath, String funcName, TableEnvironment tableEnv, ClassLoader classLoader){ try{ ScalarFunction udfFunc = Class.forName(classPath, false, classLoader) .asSubclass(ScalarFunction.class).newInstance(); @@ -186,17 +196,15 @@ public static void registerScalaUDF(String classPath, String funcName, TableEnvi /** * 注册自定义TABLEFFUNC方法到env上 - * TODO 对User-Defined Aggregate Functions的支持 + * * @param classPath * @param funcName * @param tableEnv */ - public static void registerTableUDF(String classPath, String funcName, TableEnvironment tableEnv, - ClassLoader classLoader){ + public static void registerTableUDF(String classPath, String funcName, TableEnvironment tableEnv, ClassLoader classLoader){ try { TableFunction udfFunc = Class.forName(classPath, false, classLoader) .asSubclass(TableFunction.class).newInstance(); - if(tableEnv instanceof StreamTableEnvironment){ ((StreamTableEnvironment)tableEnv).registerFunction(funcName, udfFunc); }else if(tableEnv instanceof BatchTableEnvironment){ @@ -212,6 +220,31 @@ public static void registerTableUDF(String classPath, String funcName, TableEnvi } } + /** + * 注册自定义Aggregate FUNC方法到env上 + * + * @param classPath + * @param funcName + * @param tableEnv + */ + public static void registerAggregateUDF(String classPath, String funcName, TableEnvironment tableEnv, ClassLoader classLoader) { + try { + AggregateFunction udfFunc = Class.forName(classPath, false, classLoader) + .asSubclass(AggregateFunction.class).newInstance(); + if (tableEnv instanceof StreamTableEnvironment) { + ((StreamTableEnvironment) tableEnv).registerFunction(funcName, udfFunc); + } else if (tableEnv instanceof BatchTableEnvironment) { + ((BatchTableEnvironment) tableEnv).registerFunction(funcName, udfFunc); + } else { + throw new RuntimeException("no support tableEnvironment class for " + tableEnv.getClass().getName()); + } + + logger.info("register Aggregate function:{} success.", funcName); + } catch (Exception e) { + logger.error("", e); + throw new RuntimeException("register Aggregate UDF exception:", e); + } + } /** * @@ -276,4 +309,4 @@ public static TypeInformation[] transformTypes(Class[] fieldTypes){ return types; } -} +} \ No newline at end of file diff --git a/core/src/main/java/com/dtstack/flink/sql/util/JDBCUtils.java b/core/src/main/java/com/dtstack/flink/sql/util/JDBCUtils.java index a63fc0ab4..fde2f166e 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/JDBCUtils.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/JDBCUtils.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package com.dtstack.flink.sql.util; import org.slf4j.Logger; diff --git a/core/src/main/java/com/dtstack/flink/sql/util/ParseUtils.java b/core/src/main/java/com/dtstack/flink/sql/util/ParseUtils.java index 25613d73d..73f0e9baa 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/ParseUtils.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/ParseUtils.java @@ -1,13 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.dtstack.flink.sql.util; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; -import org.apache.calcite.sql.SqlNode; +import com.google.common.collect.Lists; +import org.apache.calcite.sql.*; +import org.apache.calcite.sql.fun.SqlCase; +import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.commons.lang3.StringUtils; - -import java.util.HashSet; import java.util.List; -import java.util.Set; +import java.util.Map; + +import static org.apache.calcite.sql.SqlKind.*; /** * @Auther: jiangjunjie @@ -15,6 +52,56 @@ * @Description: */ public class ParseUtils { + public static void parseSideWhere(SqlNode whereNode, Map physicalFields, List whereConditionList) { + SqlKind sqlKind = whereNode.getKind(); + if ((sqlKind == SqlKind.OR || sqlKind == SqlKind.AND) && ((SqlBasicCall) whereNode).getOperandList().size() == 2) { + SqlNode[] sqlOperandsList = ((SqlBasicCall) whereNode).getOperands(); + // whereNode是一颗先解析or再解析and的二叉树。二叉树中序遍历,先左子树,其次中间节点,最后右子树 + parseSideWhere(sqlOperandsList[0], physicalFields, whereConditionList); + whereConditionList.add(sqlKind.name()); + parseSideWhere(sqlOperandsList[1], physicalFields, whereConditionList); + } else { + SqlIdentifier sqlIdentifier = (SqlIdentifier) ((SqlBasicCall) whereNode).getOperands()[0]; + String fieldName = null; + if (sqlIdentifier.names.size() == 1) { + fieldName = sqlIdentifier.getComponent(0).getSimple(); + } else { + fieldName = sqlIdentifier.getComponent(1).getSimple(); + } + if (physicalFields.containsKey(fieldName)) { + String sideFieldName = physicalFields.get(fieldName); + // clone SqlIdentifier node + SqlParserPos sqlParserPos = new SqlParserPos(0, 0); + SqlIdentifier sqlIdentifierClone = new SqlIdentifier("", null, sqlParserPos); + List namesClone = Lists.newArrayList(); + for(String name :sqlIdentifier.names){ + namesClone.add(name); + } + sqlIdentifierClone.setNames(namesClone,null); + // clone SqlBasicCall node + SqlBasicCall sqlBasicCall = (SqlBasicCall)whereNode; + SqlNode[] sqlNodes = sqlBasicCall.getOperands(); + SqlNode[] sqlNodesClone = new SqlNode[sqlNodes.length]; + for (int i = 0; i < sqlNodes.length; i++) { + sqlNodesClone[i] = sqlNodes[i]; + } + SqlBasicCall sqlBasicCallClone = new SqlBasicCall(sqlBasicCall.getOperator(), sqlNodesClone, sqlParserPos); + // 替换维表中真实字段名 + List names = Lists.newArrayList(); + names.add(sideFieldName); + sqlIdentifierClone.setNames(names, null); + + sqlBasicCallClone.setOperand(0, sqlIdentifierClone); + whereConditionList.add(sqlBasicCallClone.toString()); + } else { + // 如果字段不是维表中字段,删除字段前的链接符 + if (whereConditionList.size() >= 1) { + whereConditionList.remove(whereConditionList.size() - 1); + } + } + } + } + public static void parseAnd(SqlNode conditionNode, List sqlNodeList){ if(conditionNode.getKind() == SqlKind.AND && ((SqlBasicCall)conditionNode).getOperandList().size()==2){ parseAnd(((SqlBasicCall)conditionNode).getOperands()[0], sqlNodeList); @@ -32,16 +119,186 @@ public static void parseJoinCompareOperate(SqlNode condition, List sqlJo parseJoinCompareOperate(sqlNode, sqlJoinCompareOperate); } } else { - String operator = parseOperator(joinCondition.getKind()); + String operator = transformNotEqualsOperator(joinCondition.getKind()); sqlJoinCompareOperate.add(operator); } } - public static String parseOperator(SqlKind sqlKind) { + public static String transformNotEqualsOperator(SqlKind sqlKind) { if (StringUtils.equalsIgnoreCase(sqlKind.toString(), "NOT_EQUALS")){ return "!="; } return sqlKind.sql; } + public static SqlNode replaceJoinConditionTabName(SqlNode conditionNode, Map mappingTable) { + SqlNode[] operands = ((SqlBasicCall) conditionNode).getOperands(); + + for (int i = 0; i < operands.length; i++) { + SqlNode sqlNode = operands[i]; + SqlNode replaceNode = replaceNodeInfo(sqlNode, mappingTable); + operands[i] = replaceNode; + } + return conditionNode; + } + + /** + * m.id covert m_x_0.id + * @param selectNode + * @param mapTab + * @return + */ + public static SqlNode replaceSelectFieldTabName(SqlNode selectNode, Map mapTab) { + if (selectNode.getKind() == AS) { + SqlNode leftNode = ((SqlBasicCall) selectNode).getOperands()[0]; + SqlNode replaceNode = replaceSelectFieldTabName(leftNode, mapTab); + if (replaceNode != null) { + ((SqlBasicCall) selectNode).getOperands()[0] = replaceNode; + } + + return selectNode; + }else if(selectNode.getKind() == IDENTIFIER){ + SqlIdentifier sqlIdentifier = (SqlIdentifier) selectNode; + + if(sqlIdentifier.names.size() == 1){ + return selectNode; + } + + String newTableName = ParseUtils.getRootName(mapTab, sqlIdentifier.getComponent(0).getSimple()); + + if(newTableName == null){ + return selectNode; + } + sqlIdentifier = sqlIdentifier.setName(0, newTableName); + return sqlIdentifier; + + }else if(selectNode.getKind() == LITERAL || selectNode.getKind() == LITERAL_CHAIN){//字面含义 + return selectNode; + }else if( AGGREGATE.contains(selectNode.getKind()) + || AVG_AGG_FUNCTIONS.contains(selectNode.getKind()) + || COMPARISON.contains(selectNode.getKind()) + || selectNode.getKind() == OTHER_FUNCTION + || selectNode.getKind() == DIVIDE + || selectNode.getKind() == CAST + || selectNode.getKind() == TRIM + || selectNode.getKind() == TIMES + || selectNode.getKind() == PLUS + || selectNode.getKind() == NOT_IN + || selectNode.getKind() == OR + || selectNode.getKind() == AND + || selectNode.getKind() == MINUS + || selectNode.getKind() == TUMBLE + || selectNode.getKind() == TUMBLE_START + || selectNode.getKind() == TUMBLE_END + || selectNode.getKind() == SESSION + || selectNode.getKind() == SESSION_START + || selectNode.getKind() == SESSION_END + || selectNode.getKind() == HOP + || selectNode.getKind() == HOP_START + || selectNode.getKind() == HOP_END + || selectNode.getKind() == BETWEEN + || selectNode.getKind() == IS_NULL + || selectNode.getKind() == IS_NOT_NULL + || selectNode.getKind() == CONTAINS + || selectNode.getKind() == TIMESTAMP_ADD + || selectNode.getKind() == TIMESTAMP_DIFF + + ){ + SqlBasicCall sqlBasicCall = (SqlBasicCall) selectNode; + for(int i=0; i mapTab, SqlNodeList thenOperands) { + for(int i=0; i mapTab) { + if (parseNode.getKind() == IDENTIFIER) { + SqlIdentifier sqlIdentifier = (SqlIdentifier) parseNode; + + String newTableName = ParseUtils.getRootName(mapTab, sqlIdentifier.getComponent(0).getSimple());; + + if (newTableName == null || sqlIdentifier.names.size() == 1) { + return sqlIdentifier; + } + sqlIdentifier = sqlIdentifier.setName(0, newTableName); + return sqlIdentifier; + } else if (parseNode instanceof SqlBasicCall) { + SqlBasicCall sqlBasicCall = (SqlBasicCall) parseNode; + for (int i = 0; i < sqlBasicCall.getOperandList().size(); i++) { + SqlNode sqlNode = sqlBasicCall.getOperandList().get(i); + SqlNode replaceNode = replaceSelectFieldTabName(sqlNode, mapTab); + sqlBasicCall.getOperands()[i] = replaceNode; + } + + return sqlBasicCall; + } else { + return parseNode; + } + } + + + public static String getRootName(Map maps, String key) { + String res = null; + while (maps.get(key) !=null) { + res = maps.get(key); + key = res; + } + return res; + } + + public static void parseLeftNodeTableName(SqlNode leftJoin, List tablesName) { + if (leftJoin.getKind() == IDENTIFIER) { + SqlIdentifier sqlIdentifier = (SqlIdentifier) leftJoin; + tablesName.add(sqlIdentifier.names.get(0)); + } else if (leftJoin.getKind() == AS) { + SqlNode sqlNode = ((SqlBasicCall) leftJoin).getOperands()[1]; + tablesName.add(sqlNode.toString()); + } else if (leftJoin.getKind() == JOIN) { + parseLeftNodeTableName(((SqlJoin) leftJoin).getLeft(), tablesName); + parseLeftNodeTableName(((SqlJoin) leftJoin).getRight(), tablesName); + } + } } diff --git a/core/src/main/java/com/dtstack/flink/sql/util/PluginUtil.java b/core/src/main/java/com/dtstack/flink/sql/util/PluginUtil.java index 682df169e..f1c26b047 100644 --- a/core/src/main/java/com/dtstack/flink/sql/util/PluginUtil.java +++ b/core/src/main/java/com/dtstack/flink/sql/util/PluginUtil.java @@ -22,10 +22,10 @@ import com.dtstack.flink.sql.classloader.DtClassLoader; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonGenerationException; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonParseException; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonMappingException; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.core.JsonGenerationException; +import com.fasterxml.jackson.core.JsonParseException; +import com.fasterxml.jackson.databind.JsonMappingException; +import com.fasterxml.jackson.databind.ObjectMapper; import java.io.ByteArrayInputStream; import java.io.File; @@ -33,6 +33,8 @@ import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.Properties; @@ -108,19 +110,37 @@ public static Properties stringToProperties(String str) throws IOException{ } public static URL getRemoteJarFilePath(String pluginType, String tableType, String remoteSqlRootDir, String localSqlPluginPath) throws Exception { + return buildFinalJarFilePath(pluginType, tableType, remoteSqlRootDir, localSqlPluginPath); + } + + public static URL getLocalJarFilePath(String pluginType, String tableType, String localSqlPluginPath) throws Exception { + return buildFinalJarFilePath(pluginType, tableType, null, localSqlPluginPath); + } + + public static URL buildFinalJarFilePath(String pluginType, String tableType, String remoteSqlRootDir, String localSqlPluginPath) throws Exception { String dirName = pluginType + tableType.toLowerCase(); String prefix = String.format("%s-%s", pluginType, tableType.toLowerCase()); String jarPath = localSqlPluginPath + SP + dirName; String jarName = getCoreJarFileName(jarPath, prefix); - return new URL("file:" + remoteSqlRootDir + SP + dirName + SP + jarName); + String sqlRootDir = remoteSqlRootDir == null ? localSqlPluginPath : remoteSqlRootDir; + return new URL("file:" + sqlRootDir + SP + dirName + SP + jarName); } public static URL getRemoteSideJarFilePath(String pluginType, String sideOperator, String tableType, String remoteSqlRootDir, String localSqlPluginPath) throws Exception { + return buildFinalSideJarFilePath(pluginType, sideOperator, tableType, remoteSqlRootDir, localSqlPluginPath); + } + + public static URL getLocalSideJarFilePath(String pluginType, String sideOperator, String tableType, String localSqlPluginPath) throws Exception { + return buildFinalSideJarFilePath(pluginType, sideOperator, tableType, null, localSqlPluginPath); + } + + public static URL buildFinalSideJarFilePath(String pluginType, String sideOperator, String tableType, String remoteSqlRootDir, String localSqlPluginPath) throws Exception { String dirName = pluginType + sideOperator + tableType.toLowerCase(); String prefix = String.format("%s-%s-%s", pluginType, sideOperator, tableType.toLowerCase()); String jarPath = localSqlPluginPath + SP + dirName; String jarName = getCoreJarFileName(jarPath, prefix); - return new URL("file:" + remoteSqlRootDir + SP + dirName + SP + jarName); + String sqlRootDir = remoteSqlRootDir == null ? localSqlPluginPath : remoteSqlRootDir; + return new URL("file:" + sqlRootDir + SP + dirName + SP + jarName); } public static String upperCaseFirstChar(String str){ @@ -144,6 +164,25 @@ public static void addPluginJar(String pluginDir, DtClassLoader classLoader) thr } } + public static URL[] getPluginJarUrls(String pluginDir) throws MalformedURLException { + List urlList = new ArrayList<>(); + File dirFile = new File(pluginDir); + if(!dirFile.exists() || !dirFile.isDirectory()){ + throw new RuntimeException("plugin path:" + pluginDir + "is not exist."); + } + + File[] files = dirFile.listFiles(tmpFile -> tmpFile.isFile() && tmpFile.getName().endsWith(JAR_SUFFIX)); + if(files == null || files.length == 0){ + throw new RuntimeException("plugin path:" + pluginDir + " is null."); + } + + for(File file : files){ + URL pluginJarURL = file.toURI().toURL(); + urlList.add(pluginJarURL); + } + return urlList.toArray(new URL[urlList.size()]); + } + public static String getCoreJarFileName (String path, String prefix) throws Exception { String coreJarFileName = null; File pluginDir = new File(path); diff --git a/core/src/main/java/com/dtstack/flink/sql/util/PropertiesUtils.java b/core/src/main/java/com/dtstack/flink/sql/util/PropertiesUtils.java new file mode 100644 index 000000000..dcb2a081a --- /dev/null +++ b/core/src/main/java/com/dtstack/flink/sql/util/PropertiesUtils.java @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.util; + +import java.util.Properties; + +public class PropertiesUtils { + public static Properties propertiesTrim(Properties confProperties) { + Properties properties = new Properties(); + confProperties.forEach( + (k, v) -> { + properties.put(k.toString().trim(), v.toString().trim()); + } + ); + return properties; + } +} diff --git a/core/src/main/java/com/dtstack/flink/sql/watermarker/AbsCustomerWaterMarker.java b/core/src/main/java/com/dtstack/flink/sql/watermarker/AbsCustomerWaterMarker.java index b990bdd82..c9fbc0f44 100644 --- a/core/src/main/java/com/dtstack/flink/sql/watermarker/AbsCustomerWaterMarker.java +++ b/core/src/main/java/com/dtstack/flink/sql/watermarker/AbsCustomerWaterMarker.java @@ -1,3 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + package com.dtstack.flink.sql.watermarker; import com.dtstack.flink.sql.metric.EventDelayGauge; diff --git a/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java b/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java index 4f386d75f..d444c1bac 100644 --- a/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java +++ b/core/src/main/java/com/dtstack/flink/sql/watermarker/WaterMarkerAssigner.java @@ -21,13 +21,10 @@ package com.dtstack.flink.sql.watermarker; import com.dtstack.flink.sql.table.SourceTableInfo; -import com.google.common.collect.Lists; -import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.base.Strings; +import com.google.common.base.Strings; import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor; import org.apache.flink.streaming.api.windowing.time.Time; import org.apache.flink.types.Row; import org.apache.flink.util.Preconditions; diff --git a/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java b/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java index 8eb09ee18..3bab778ae 100644 --- a/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java +++ b/core/src/test/java/com/dtstack/flink/sql/side/SideSqlExecTest.java @@ -22,12 +22,10 @@ import com.dtstack.flink.sql.Main; import com.dtstack.flink.sql.parser.SqlParser; import com.dtstack.flink.sql.parser.SqlTree; -import org.apache.flink.calcite.shaded.com.google.common.base.Charsets; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.base.Charsets; +import com.google.common.collect.Lists; import org.junit.Test; -import java.io.UnsupportedEncodingException; -import java.net.URLDecoder; import java.net.URLEncoder; import java.util.List; diff --git a/core/src/test/java/com/dtstack/flink/sql/side/TestSideSqlParser.java b/core/src/test/java/com/dtstack/flink/sql/side/TestSideSqlParser.java index 01e33a3b6..18a7d2d86 100644 --- a/core/src/test/java/com/dtstack/flink/sql/side/TestSideSqlParser.java +++ b/core/src/test/java/com/dtstack/flink/sql/side/TestSideSqlParser.java @@ -21,7 +21,7 @@ package com.dtstack.flink.sql.side; import org.apache.calcite.sql.parser.SqlParseException; -import org.apache.flink.calcite.shaded.com.google.common.collect.Sets; +import com.google.common.collect.Sets; import org.junit.Test; import java.util.Set; diff --git a/docs/clickhouseSide.md b/docs/clickhouseSide.md new file mode 100644 index 000000000..63d3cc3da --- /dev/null +++ b/docs/clickhouseSide.md @@ -0,0 +1,85 @@ + +## 1.格式: +``` + CREATE TABLE tableName( + colName cloType, + ... + PRIMARY KEY(keyInfo), + PERIOD FOR SYSTEM_TIME + )WITH( + type='clickhouse', + url='jdbcUrl', + userName='dbUserName', + password='dbPwd', + tableName='tableName', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + parallelism ='1', + partitionedJoin='false' + ); +``` + +# 2.支持版本 + 19.14.x、19.15.x、19.16.x + +## 3.表结构定义 + + |参数名称|含义| + |----|---| + | tableName | clickhouse表名称| + | colName | 列名称| + | colType | 列类型 [colType支持的类型](colType.md)| + | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| + | PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开| + +## 4.参数 + + |参数名称|含义|是否必填|默认值| + |----|---|---|----| + | type | 表明维表的类型 clickhouse |是|| + | url | 连接clickhouse数据库 jdbcUrl |是|| + | userName | clickhouse连接用户名 |是|| + | password | clickhouse连接密码|是|| + | tableName | clickhouse表名称|是|| + | tableName | clickhouse 的表名称|是|| + | cache | 维表缓存策略(NONE/LRU)|否|NONE| + | partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| + + ---------- + > 缓存策略 + * NONE: 不做内存缓存 + * LRU: + * cacheSize: 缓存的条目数量 + * cacheTTLMs:缓存的过期时间(ms) + * cacheMode: (unordered|ordered)异步加载是有序还是无序,默认有序。 + * asyncCapacity:异步请求容量,默认1000 + * asyncTimeout:异步请求超时时间,默认10000毫秒 + +## 5.样例 +``` +create table sideTable( + channel varchar, + xccount int, + PRIMARY KEY(channel), + PERIOD FOR SYSTEM_TIME + )WITH( + type='clickhouse', + url='jdbc:clickhouse://172.16.8.104:3306/test?charset=utf8', + userName='dtstack', + password='abc123', + tableName='sidetest', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + cacheMode='unordered', + asyncCapacity='1000', + asyncTimeout='10000' + parallelism ='1', + partitionedJoin='false' + ); + + +``` + + diff --git a/docs/clickhouseSink.md b/docs/clickhouseSink.md new file mode 100644 index 000000000..d9774727f --- /dev/null +++ b/docs/clickhouseSink.md @@ -0,0 +1,53 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='clickhouse', + url ='jdbcUrl', + userName ='userName', + password ='pwd', + tableName ='tableName', + parallelism ='parllNum' + ); + +``` + +## 2.支持版本 + 19.14.x、19.15.x、19.16.x + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName| clickhouse表名称| +| colName | 列名称| +| colType | 列类型 [colType支持的类型](colType.md)| + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|----|----|----| +|type |表明 输出表类型 clickhouse |是|| +|url | 连接clickhouse 数据库 jdbcUrl |是|| +|userName | clickhouse 连接用户名 |是|| +| password | clickhouse 连接密码|是|| +| tableName | clickhouse 表名称|是|| +| parallelism | 并行度设置|否|1| + +## 5.样例: +``` +CREATE TABLE MyResult( + channel VARCHAR, + pv VARCHAR + )WITH( + type ='clickhouse', + url ='jdbc:clickhouse://172.16.8.104:3306/test?charset=utf8', + userName ='dtstack', + password ='abc123', + tableName ='pv2', + parallelism ='1' + ) + ``` \ No newline at end of file diff --git a/docs/elasticsearchSink.md b/docs/elasticsearchSink.md index 9a406245a..69d69af75 100644 --- a/docs/elasticsearchSink.md +++ b/docs/elasticsearchSink.md @@ -27,7 +27,7 @@ CREATE TABLE tableName( ## 4.参数: |参数名称|含义|是否必填|默认值| |----|---|---|----| -|type|表明 输出表类型[mysql\|hbase\|elasticsearch]|是|| +|type|表明 输出表类型[mysq|hbase|elasticsearch]|是|| |address | 连接ES Transport地址(tcp地址)|是|| |cluster | ES 集群名称 |是|| |index | 选择的ES上的index名称|是|| diff --git a/docs/hbaseSide.md b/docs/hbaseSide.md index 07b03026a..0c4e545f9 100644 --- a/docs/hbaseSide.md +++ b/docs/hbaseSide.md @@ -35,7 +35,7 @@ |参数名称|含义|是否必填|默认值| |----|---|---|----| -| type | 表明维表的类型[hbase\|mysql]|是|| +| type | 表明维表的类型[hbase|mysql]|是|| | zookeeperQuorum | hbase 的zk地址;格式ip:port[;ip:port]|是|| | zookeeperParent | hbase 的zk parent路径|是|| | tableName | hbase 的表名称|是|| diff --git a/docs/hbaseSink.md b/docs/hbaseSink.md index b41abb281..c6a15e766 100644 --- a/docs/hbaseSink.md +++ b/docs/hbaseSink.md @@ -30,7 +30,7 @@ hbase2.0 |参数名称|含义|是否必填|默认值| |----|---|---|-----| -|type | 表明 输出表类型[mysql\|hbase\|elasticsearch]|是|| +|type | 表明 输出表类型[mysq|hbase|elasticsearch]|是|| |zookeeperQuorum | hbase zk地址,多个直接用逗号隔开|是|| |zookeeperParent | zkParent 路径|是|| |tableName | 关联的hbase表名称|是|| diff --git a/docs/kafkaSource.md b/docs/kafkaSource.md index a78239421..580eda6b4 100644 --- a/docs/kafkaSource.md +++ b/docs/kafkaSource.md @@ -1,4 +1,3 @@ -# 一、json格式数据源 ## 1.格式: ``` 数据现在支持json格式{"xx":"bb","cc":"dd"} @@ -22,7 +21,7 @@ CREATE TABLE tableName( ``` ## 2.支持的版本 - kafka08,kafka09,kafka10,kafka11 + kafka08,kafka09,kafka10,kafka11及以上版本 **kafka读取和写入的版本必须一致,否则会有兼容性错误。** ## 3.表结构定义 @@ -33,19 +32,19 @@ CREATE TABLE tableName( | colName | 列名称| | colType | 列类型 [colType支持的类型](colType.md)| | function(colNameX) as aliasName | 支持在定义列信息的时候根据已有列类型生成新的列(函数可以使用系统函数和已经注册的UDF)| -| WATERMARK FOR colName AS withOffset( colName , delayTime ) | 标识输入流生的watermake生成规则,根据指定的colName(当前支持列的类型为Long \| Timestamp) 和delayTime生成waterMark 同时会在注册表的使用附带上rowtime字段(如果未指定则默认添加proctime字段);注意:添加该标识的使用必须设置系统参数 time.characteristic:EventTime; delayTime: 数据最大延迟时间(ms)| +| WATERMARK FOR colName AS withOffset( colName , delayTime ) | 标识输入流生的watermake生成规则,根据指定的colName(当前支持列的类型为Long | Timestamp) 和delayTime生成waterMark 同时会在注册表的使用附带上rowtime字段(如果未指定则默认添加proctime字段);注意:添加该标识的使用必须设置系统参数 time.characteristic:EventTime; delayTime: 数据最大延迟时间(ms)| ## 4.参数: |参数名称|含义|是否必填|默认值| |----|---|---|---| -|type | kafka09 | 是|| +|type | kafka09 | 是|kafka08、kafka09、kafka10、kafka11、kafka(对应kafka1.0及以上版本)| |kafka.group.id | 需要读取的 groupId 名称|否|| |kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| |kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| |kafka.topic | 需要读取的 topic 名称|是|| -|patterntopic | topic是否是正则表达式格式(true|false) |否| false -|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest\|指定offset值({"0":12312,"1":12321,"2":12312},{"partition_no":offset_value})]|否|latest| +|patterntopic | topic是否是正则表达式格式(true|false) |否| false +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest|earliest|指定offset值({"0":12312,"1":12321,"2":12312},{"partition_no":offset_value})]|否|latest| |parallelism | 并行度设置|否|1| |sourcedatatype | 数据类型|否|json| |timezone|时区设置[timezone支持的参数](timeZone.md)|否|'Asia/Shanghai' @@ -71,6 +70,75 @@ CREATE TABLE MyTable( sourcedatatype ='json' #可不设置 ); ``` +## 6.支持嵌套json、数据类型字段解析 + +嵌套json解析示例 + +json: {"name":"tom", "obj":{"channel": "root"}, "pv": 4, "xctime":1572932485} +``` +CREATE TABLE MyTable( + name varchar, + obj.channel varchar as channel, + pv INT, + xctime bigint, + CHARACTER_LENGTH(channel) AS timeLeng + )WITH( + type ='kafka09', + bootstrapServers ='172.16.8.198:9092', + zookeeperQuorum ='172.16.8.198:2181/kafka', + offsetReset ='latest', + groupId='nbTest', + topic ='nbTest1,nbTest2,nbTest3', + --- topic ='mqTest.*', + ---topicIsPattern='true', + parallelism ='1' + ); +``` + +数组类型字段解析示例 + +json: {"name":"tom", "obj":{"channel": "root"}, "user": [{"pv": 4}, {"pv": 10}], "xctime":1572932485} +``` +CREATE TABLE MyTable( + name varchar, + obj.channel varchar as channel, + user[1].pv INT as pv, + xctime bigint, + CHARACTER_LENGTH(channel) AS timeLeng + )WITH( + type ='kafka09', + bootstrapServers ='172.16.8.198:9092', + zookeeperQuorum ='172.16.8.198:2181/kafka', + offsetReset ='latest', + groupId='nbTest', + topic ='nbTest1,nbTest2,nbTest3', + --- topic ='mqTest.*', + ---topicIsPattern='true', + parallelism ='1' + ); +``` +or + +json: {"name":"tom", "obj":{"channel": "root"}, "pv": [4, 7, 10], "xctime":1572932485} +``` +CREATE TABLE MyTable( + name varchar, + obj.channel varchar as channel, + pv[1] INT as pv, + xctime bigint, + CHARACTER_LENGTH(channel) AS timeLeng + )WITH( + type ='kafka09', + bootstrapServers ='172.16.8.198:9092', + zookeeperQuorum ='172.16.8.198:2181/kafka', + offsetReset ='latest', + groupId='nbTest', + topic ='nbTest1,nbTest2,nbTest3', + --- topic ='mqTest.*', + ---topicIsPattern='true', + parallelism ='1' + ); +``` # 二、csv格式数据源 根据字段分隔符进行数据分隔,按顺序匹配sql中配置的列。如数据分隔列数和sql中配置的列数相等直接匹配;如不同参照lengthcheckpolicy策略处理。 ## 1.参数: @@ -81,7 +149,7 @@ CREATE TABLE MyTable( |kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| |kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| |kafka.topic | 需要读取的 topic 名称|是|| -|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest]|否|latest| +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest|earliest]|否|latest| |parallelism | 并行度设置 |否|1| |sourcedatatype | 数据类型|是 |csv| |fielddelimiter | 字段分隔符|是 || @@ -140,7 +208,7 @@ create table kafka_stream( |kafka.bootstrap.servers | kafka bootstrap-server 地址信息(多个用逗号隔开)|是|| |kafka.zookeeper.quorum | kafka zk地址信息(多个之间用逗号分隔)|是|| |kafka.topic | 需要读取的 topic 名称|是|| -|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest\|earliest]|否|latest| +|kafka.auto.offset.reset | 读取的topic 的offset初始位置[latest|earliest]|否|latest| |parallelism | 并行度设置|否|1| |sourcedatatype | 数据类型|否|text| **kafka相关参数可以自定义,使用kafka.开头即可。** diff --git a/docs/kuduSide.md b/docs/kuduSide.md new file mode 100644 index 000000000..5a73596ad --- /dev/null +++ b/docs/kuduSide.md @@ -0,0 +1,127 @@ + +## 1.格式: +All: +``` +create table sideTable( + id int, + tablename1 VARCHAR, + PRIMARY KEY(id), + PERIOD FOR SYSTEM_TIME + )WITH( + type='kudu', + kuduMasters ='ip1,ip2,ip3', + tableName ='impala::default.testSide', + cache ='ALL', + primaryKey='id,xx', + lowerBoundPrimaryKey='10,xx', + upperBoundPrimaryKey='15,xx', + workerCount='1', + defaultOperationTimeoutMs='600000', + defaultSocketReadTimeoutMs='6000000', + batchSizeBytes='100000000', + limitNum='1000', + isFaultTolerant='false', + partitionedJoin='false' + ); +``` +LRU: +``` +create table sideTable( + id int, + tablename1 VARCHAR, + PRIMARY KEY(id), + PERIOD FOR SYSTEM_TIME + )WITH( + type='kudu', + kuduMasters ='ip1,ip2,ip3', + tableName ='impala::default.testSide', + cache ='LRU', + workerCount='1', + defaultOperationTimeoutMs='600000', + defaultSocketReadTimeoutMs='6000000', + batchSizeBytes='100000000', + limitNum='1000', + isFaultTolerant='false', + partitionedJoin='false' + ); + ``` + +## 2.支持版本 +kudu 1.9.0+cdh6.2.0 + +## 3.表结构定义 + + |参数名称|含义| + |----|---| + | tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)| + | colName | 列名称| + | colType | 列类型 [colType支持的类型](colType.md)| + | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| + | PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开| + +## 3.参数 + + +|参数名称|含义|是否必填|默认值| +|----|---|---|-----| +|type | 表明维表的类型[hbase|mysql|kudu]|是|| +| kuduMasters | kudu master节点的地址;格式ip[ip,ip2]|是|| +| tableName | kudu 的表名称|是|| +| workerCount | 工作线程数 |否|| +| defaultOperationTimeoutMs | 写入操作超时时间 |否|| +| defaultSocketReadTimeoutMs | socket读取超时时间 |否|| +| primaryKey | 需要过滤的主键 ALL模式独有 |否|| +| lowerBoundPrimaryKey | 需要过滤的主键的最小值 ALL模式独有 |否|| +| upperBoundPrimaryKey | 需要过滤的主键的最大值(不包含) ALL模式独有 |否|| +| workerCount | 工作线程数 |否|| +| defaultOperationTimeoutMs | 写入操作超时时间 |否|| +| defaultSocketReadTimeoutMs | socket读取超时时间 |否|| +| batchSizeBytes |返回数据的大小 | 否|| +| limitNum |返回数据的条数 | 否|| +| isFaultTolerant |查询是否容错 查询失败是否扫描第二个副本 默认false 容错 | 否|| +| cache | 维表缓存策略(NONE/LRU/ALL)|否|NONE| +| partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| + + +-------------- +> 缓存策略 + * NONE: 不做内存缓存 + * LRU: + * cacheSize: 缓存的条目数量 + * cacheTTLMs:缓存的过期时间(ms) + +## 4.样例 +All: +``` +create table sideTable( + id int, + tablename1 VARCHAR, + PRIMARY KEY(id), + PERIOD FOR SYSTEM_TIME + )WITH( + type='kudu', + kuduMasters ='ip1,ip2,ip3', + tableName ='impala::default.testSide', + cache ='ALL', + primaryKey='id,xx', + lowerBoundPrimaryKey='10,xx', + upperBoundPrimaryKey='15,xx', + partitionedJoin='false' + ); +``` +LRU: +``` +create table sideTable( + id int, + tablename1 VARCHAR, + PRIMARY KEY(id), + PERIOD FOR SYSTEM_TIME + )WITH( + type='kudu', + kuduMasters ='ip1,ip2,ip3', + tableName ='impala::default.testSide', + cache ='LRU', + partitionedJoin='false' + ); + ``` + diff --git a/docs/kuduSink.md b/docs/kuduSink.md new file mode 100644 index 000000000..990dfdd7d --- /dev/null +++ b/docs/kuduSink.md @@ -0,0 +1,62 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='kudu', + kuduMasters ='ip1,ip2,ip3', + tableName ='impala::default.test', + writeMode='upsert', + workerCount='1', + defaultOperationTimeoutMs='600000', + defaultSocketReadTimeoutMs='6000000', + parallelism ='parllNum' + ); + + +``` + +## 2.支持版本 +kudu 1.9.0+cdh6.2.0 + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName | 在 sql 中使用的名称;即注册到flink-table-env上的名称 +| colName | 列名称,redis中存储为 表名:主键名:主键值:列名]| +| colType | 列类型 [colType支持的类型](colType.md)| + + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|---|---|-----| +|type | 表名 输出表类型[mysq|hbase|elasticsearch|redis|kudu]|是|| +| kuduMasters | kudu master节点的地址;格式ip[ip,ip2]|是|| +| tableName | kudu 的表名称|是|| +| writeMode | 写入kudu的模式 insert|update|upsert |否 |upsert +| workerCount | 工作线程数 |否| +| defaultOperationTimeoutMs | 写入操作超时时间 |否| +| defaultSocketReadTimeoutMs | socket读取超时时间 |否| +|parallelism | 并行度设置|否|1| + + +## 5.样例: +``` +CREATE TABLE MyResult( + id int, + title VARCHAR, + amount decimal, + tablename1 VARCHAR + )WITH( + type ='kudu', + kuduMasters ='localhost1,localhost2,localhost3', + tableName ='impala::default.test', + writeMode='upsert', + parallelism ='1' + ); + + ``` \ No newline at end of file diff --git a/docs/mysqlSide.md b/docs/mysqlSide.md index b17c72bb6..f0eb16090 100644 --- a/docs/mysqlSide.md +++ b/docs/mysqlSide.md @@ -27,7 +27,7 @@ |参数名称|含义| |----|---| - | tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)| + | tableName | mysql表名称| | colName | 列名称| | colType | 列类型 [colType支持的类型](colType.md)| | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| @@ -37,7 +37,7 @@ |参数名称|含义|是否必填|默认值| |----|---|---|----| - | type | 表明维表的类型[hbase\|mysql] |是|| + | type | 表明维表的类型 mysql |是|| | url | 连接mysql数据库 jdbcUrl |是|| | userName | mysql连接用户名 |是|| | password | mysql连接密码|是|| diff --git a/docs/mysqlSink.md b/docs/mysqlSink.md index 192c4944a..3218f9371 100644 --- a/docs/mysqlSink.md +++ b/docs/mysqlSink.md @@ -22,7 +22,7 @@ CREATE TABLE tableName( |参数名称|含义| |----|---| -| tableName| 在 sql 中使用的名称;即注册到flink-table-env上的名称| +| tableName| mysql表名称| | colName | 列名称| | colType | 列类型 [colType支持的类型](colType.md)| @@ -30,7 +30,7 @@ CREATE TABLE tableName( |参数名称|含义|是否必填|默认值| |----|----|----|----| -|type |表明 输出表类型[mysql\|hbase\|elasticsearch]|是|| +|type |表名 输出表类型[mysq|hbase|elasticsearch]|是|| |url | 连接mysql数据库 jdbcUrl |是|| |userName | mysql连接用户名 |是|| | password | mysql连接密码|是|| diff --git a/docs/oracleSide.md b/docs/oracleSide.md new file mode 100644 index 000000000..74fc56680 --- /dev/null +++ b/docs/oracleSide.md @@ -0,0 +1,85 @@ + +## 1.格式: +``` + CREATE TABLE tableName( + colName cloType, + ... + PRIMARY KEY(keyInfo), + PERIOD FOR SYSTEM_TIME + )WITH( + type='oracle', + url='jdbcUrl', + userName='dbUserName', + password='dbPwd', + tableName='tableName', + cache ='LRU', + schema = 'MQTEST', + parallelism ='1', + partitionedJoin='false' + ); +``` + +# 2.支持版本 + 10g 11g + +## 3.表结构定义 + + |参数名称|含义| + |----|---| + | tableName | oracle表名称| + | colName | 列名称| + | colType | 列类型 [colType支持的类型](colType.md)| + | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| + | PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开| + +## 4.参数 + + |参数名称|含义|是否必填|默认值| + |----|---|---|----| + | type | 表明维表的类型 oracle |是|| + | url | 连接oracle数据库 jdbcUrl |是|| + | userName | oracle连接用户名 |是|| + | password | oracle连接密码|是|| + | tableName | oracle表名称|是|| + | schema | oracle 的schema|否|当前登录用户| + | cache | 维表缓存策略(NONE/LRU)|否|NONE| + | partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| + + ---------- + > 缓存策略 + * NONE: 不做内存缓存 + * LRU: + * cacheSize: 缓存的条目数量 + * cacheTTLMs:缓存的过期时间(ms) + * cacheMode: (unordered|ordered)异步加载是有序还是无序,默认有序。 + * asyncCapacity:异步请求容量,默认1000 + * asyncTimeout:异步请求超时时间,默认10000毫秒 + +## 5.样例 +``` +create table sideTable( + channel varchar, + xccount int, + PRIMARY KEY(channel), + PERIOD FOR SYSTEM_TIME + )WITH( + type='oracle', + url='jdbc:oracle:thin:@xx.xx.xx.xx:1521:orcl', + userName='xx', + password='xx', + tableName='sidetest', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + cacheMode='unordered', + asyncCapacity='1000', + asyncTimeout='10000' + parallelism ='1', + partitionedJoin='false', + schema = 'MQTEST' + ); + + +``` + + diff --git a/docs/oracleSink.md b/docs/oracleSink.md new file mode 100644 index 000000000..47ddd8371 --- /dev/null +++ b/docs/oracleSink.md @@ -0,0 +1,55 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='oracle', + url ='jdbcUrl', + userName ='userName', + password ='pwd', + tableName ='tableName', + parallelism ='parllNum' + ); + +``` + +## 2.支持版本 + 10g 11g + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName| oracle表名称| +| colName | 列名称| +| colType | 列类型 [colType支持的类型](colType.md)| + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|----|----|----| +|type |表名 输出表类型[mysq|hbase|elasticsearch|oracle]|是|| +|url | 连接oracle数据库 jdbcUrl |是|| +|userName | oracle连接用户名 |是|| +| password | oracle连接密码|是|| +| tableName | oracle表名称|是|| +| schema | oracle 的schema|否|当前登录用户| +| parallelism | 并行度设置|否|1| + +## 5.样例: +``` +CREATE TABLE MyResult( + channel VARCHAR, + pv VARCHAR + )WITH( + type ='oracle', + url ='jdbc:oracle:thin:@xx.xx.xx.xx:1521:orcl', + userName ='dtstack', + password ='abc123', + tableName ='pv2', + schema = 'MQTEST', + parallelism ='1' + ) + ``` \ No newline at end of file diff --git a/docs/postgresqlSide.md b/docs/postgresqlSide.md new file mode 100644 index 000000000..68d10b869 --- /dev/null +++ b/docs/postgresqlSide.md @@ -0,0 +1,80 @@ + +## 1.格式: +``` + CREATE TABLE tableName( + colName cloType, + ... + PRIMARY KEY(keyInfo), + PERIOD FOR SYSTEM_TIME + )WITH( + type='postgresql', + url='jdbcUrl', + userName='dbUserName', + password='dbPwd', + tableName='tableName', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + parallelism ='1', + partitionedJoin='false' + ); +``` + +# 2.支持版本 + postgresql-8.2+ + +## 3.表结构定义 + + |参数名称|含义| + |----|---| + | tableName | 注册到flink的表名称(可选填;不填默认和hbase对应的表名称相同)| + | colName | 列名称| + | colType | 列类型 [colType支持的类型](colType.md)| + | PERIOD FOR SYSTEM_TIME | 关键字表明该定义的表为维表信息| + | PRIMARY KEY(keyInfo) | 维表主键定义;多个列之间用逗号隔开| + +## 4.参数 + + |参数名称|含义|是否必填|默认值| + |----|---|---|----| + | type | 表明维表的类型[postgresql] |是|| + | url | 连接postgresql数据库 jdbcUrl |是|| + | userName | postgresql连接用户名 |是|| + | password | postgresql连接密码|是|| + | tableName | postgresql表名称|是|| + | tableName | postgresql 的表名称|是|| + | cache | 维表缓存策略(NONE/LRU/ALL)|否|NONE| + | partitionedJoin | 是否在維表join之前先根据 設定的key 做一次keyby操作(可以減少维表的数据缓存量)|否|false| + + ---------- + > 缓存策略 + * NONE: 不做内存缓存 + * LRU: + * cacheSize: 缓存的条目数量 + * cacheTTLMs:缓存的过期时间(ms) + + +## 5.样例 +``` +create table sideTable( + channel varchar, + xccount int, + PRIMARY KEY(channel), + PERIOD FOR SYSTEM_TIME + )WITH( + type='postgresql', + url='jdbc:postgresql://localhost:9001/test?sslmode=disable', + userName='dtstack', + password='abc123', + tableName='sidetest', + cache ='LRU', + cacheSize ='10000', + cacheTTLMs ='60000', + parallelism ='1', + partitionedJoin='false' + ); + + +``` + + diff --git a/docs/postgresqlSink.md b/docs/postgresqlSink.md new file mode 100644 index 000000000..da09c4c34 --- /dev/null +++ b/docs/postgresqlSink.md @@ -0,0 +1,55 @@ +## 1.格式: +``` +CREATE TABLE tableName( + colName colType, + ... + colNameX colType + )WITH( + type ='postgresql', + url ='jdbcUrl', + userName ='userName', + password ='pwd', + tableName ='tableName', + parallelism ='parllNum' + ); + +``` + +## 2.支持版本 + postgresql-8.2+ + +## 3.表结构定义 + +|参数名称|含义| +|----|---| +| tableName| 在 sql 中使用的名称;即注册到flink-table-env上的名称| +| colName | 列名称| +| colType | 列类型 [colType支持的类型](colType.md)| + +## 4.参数: + +|参数名称|含义|是否必填|默认值| +|----|----|----|----| +| type |表明 输出表类型[postgresql]|是|| +| url | 连接postgresql数据库 jdbcUrl |是|| +| userName | postgresql连接用户名 |是|| +| password | postgresql连接密码|是|| +| tableName | postgresqll表名称|是|| +| parallelism | 并行度设置|否|1| +| isUpsert | 使用upsert模式插入数据(版本9.5之后才支持upsert) |否|false +| keyField | 设置更新主键字段名(isupsert为true时为必填项)|否| + +## 5.样例: +``` +CREATE TABLE MyResult( + channel VARCHAR, + pv VARCHAR + )WITH( + type ='postgresql', + url ='jdbc:postgresql://localhost:9001/test?sslmode=disable', + userName ='dtstack', + password ='abc123', + tableName ='pv2', + parallelism ='1' + ) + ``` diff --git a/docs/redisSide.md b/docs/redisSide.md index 52d637b70..9c7f4b47e 100644 --- a/docs/redisSide.md +++ b/docs/redisSide.md @@ -11,6 +11,7 @@ password = 'redisPwd', database = 'dbName', tableName ='sideTableName', + redisType = '1', cache ='LRU', cacheSize ='10000', cacheTTLMs ='60000' @@ -32,9 +33,11 @@ |参数名称|含义|是否必填|默认值| |----|---|---|----| -| type | 表明维表的类型[hbase\|mysql\|redis]|是|| +| type | 表明维表的类型[hbase|mysql|redis]|是|| | url | redis 的地址;格式ip:port[,ip:port]|是|| | password | redis 的密码 |是|| +| redisType | redis模式(1 单机,2 哨兵, 3 集群)| 是 | +| masterName | 主节点名称(哨兵模式下为必填项) | 否 | | database | reids 的数据库地址|否|| | tableName | redis 的表名称|是|| | cache | 维表缓存策略(NONE/LRU/ALL)|否|NONE| @@ -60,6 +63,7 @@ create table sideTable( url='172.16.10.79:6379', password='abc123', database='0', + redisType = '1', tableName='sidetest', cache = 'LRU', cacheTTLMs='10000' diff --git a/docs/redisSink.md b/docs/redisSink.md index a3c80914e..6a754e5c6 100644 --- a/docs/redisSink.md +++ b/docs/redisSink.md @@ -9,6 +9,7 @@ CREATE TABLE tableName( url = 'ip:port', database ='dbName', password ='pwd', + redisType='1', tableName ='tableName', parallelism ='parllNum' ); @@ -32,12 +33,14 @@ redis5.0 |参数名称|含义|是否必填|默认值| |----|---|---|-----| -|type | 表明 输出表类型[mysql\|hbase\|elasticsearch\|redis\]|是|| +| type | 表名 输出表类型[mysq|hbase|elasticsearch|redis]|是|| | url | redis 的地址;格式ip:port[,ip:port]|是|| | password | redis 的密码 |是|| +| redisType | redis模式(1 单机,2 哨兵, 3 集群)| 是 | +| masterName | 主节点名称(哨兵模式下为必填项) | 否 | | database | reids 的数据库地址|否|| | tableName | redis 的表名称|是|| -|parallelism | 并行度设置|否|1| +| parallelism | 并行度设置|否|1| ## 5.样例: @@ -51,7 +54,8 @@ redis5.0 url='172.16.10.79:6379', password='abc123', database='0', - tableName='sinktoredis', + redisType='1', + tableName='sinktoredis' ); ``` \ No newline at end of file diff --git a/elasticsearch5/elasticsearch5-sink/src/main/java/com/dtstack/flink/sql/sink/elasticsearch/table/ElasticsearchTableInfo.java b/elasticsearch5/elasticsearch5-sink/src/main/java/com/dtstack/flink/sql/sink/elasticsearch/table/ElasticsearchTableInfo.java index 19f158c3a..e616163ec 100644 --- a/elasticsearch5/elasticsearch5-sink/src/main/java/com/dtstack/flink/sql/sink/elasticsearch/table/ElasticsearchTableInfo.java +++ b/elasticsearch5/elasticsearch5-sink/src/main/java/com/dtstack/flink/sql/sink/elasticsearch/table/ElasticsearchTableInfo.java @@ -22,7 +22,7 @@ import com.dtstack.flink.sql.table.TargetTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * @date 2018/09/12 diff --git a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java index c9adc87b9..0fe2d1720 100644 --- a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java +++ b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllReqRow.java @@ -24,7 +24,7 @@ import com.dtstack.flink.sql.side.hbase.table.HbaseSideTableInfo; import org.apache.commons.collections.map.HashedMap; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; import org.apache.flink.util.Collector; diff --git a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllSideInfo.java b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllSideInfo.java index dde3f41ba..ea51f46e4 100644 --- a/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllSideInfo.java +++ b/hbase/hbase-side/hbase-all-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAllSideInfo.java @@ -25,11 +25,9 @@ import com.dtstack.flink.sql.side.SideInfo; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncSideInfo.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncSideInfo.java index 84a6358d1..2bfdd0d44 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncSideInfo.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/HbaseAsyncSideInfo.java @@ -6,12 +6,10 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.hbase.table.HbaseSideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import java.util.List; import java.util.Map; diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/AbsRowKeyModeDealer.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/AbsRowKeyModeDealer.java index 5fe61af4f..d73b31e75 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/AbsRowKeyModeDealer.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/AbsRowKeyModeDealer.java @@ -23,7 +23,7 @@ import com.dtstack.flink.sql.side.FieldInfo; import com.dtstack.flink.sql.side.cache.AbsSideCache; import org.apache.calcite.sql.JoinType; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.types.Row; import org.hbase.async.HBaseClient; diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java index d79f67ec5..b75bca40c 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/PreRowKeyModeDealerDealer.java @@ -28,7 +28,7 @@ import com.dtstack.flink.sql.side.hbase.utils.HbaseUtils; import com.google.common.collect.Maps; import org.apache.calcite.sql.JoinType; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.types.Row; import org.hbase.async.BinaryPrefixComparator; @@ -88,6 +88,7 @@ private String dealOneRow(ArrayList> args, String rowKeyStr, } List cacheContent = Lists.newArrayList(); + List rowList = Lists.newArrayList(); for(List oneRow : args){ try { @@ -120,8 +121,7 @@ private String dealOneRow(ArrayList> args, String rowKeyStr, if (openCache) { cacheContent.add(sideVal); } - - resultFuture.complete(Collections.singleton(row)); + rowList.add(row); } } catch (Exception e) { resultFuture.complete(null); @@ -130,6 +130,10 @@ private String dealOneRow(ArrayList> args, String rowKeyStr, } } + if (rowList.size() > 0){ + resultFuture.complete(rowList); + } + if(openCache){ sideCache.putCache(rowKeyStr, CacheObj.buildCacheObj(ECacheContentType.MultiLine, cacheContent)); } diff --git a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java index fe3149e3d..8604db18e 100644 --- a/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java +++ b/hbase/hbase-side/hbase-async-side/src/main/java/com/dtstack/flink/sql/side/hbase/rowkeydealer/RowKeyEqualModeDealer.java @@ -28,7 +28,7 @@ import com.dtstack.flink.sql.side.hbase.utils.HbaseUtils; import com.google.common.collect.Maps; import org.apache.calcite.sql.JoinType; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.types.Row; import org.hbase.async.GetRequest; diff --git a/hbase/hbase-side/hbase-side-core/src/main/java/com/dtstack/flink/sql/side/hbase/RowKeyBuilder.java b/hbase/hbase-side/hbase-side-core/src/main/java/com/dtstack/flink/sql/side/hbase/RowKeyBuilder.java index af2053fe6..114b7fa6a 100644 --- a/hbase/hbase-side/hbase-side-core/src/main/java/com/dtstack/flink/sql/side/hbase/RowKeyBuilder.java +++ b/hbase/hbase-side/hbase-side-core/src/main/java/com/dtstack/flink/sql/side/hbase/RowKeyBuilder.java @@ -21,7 +21,7 @@ package com.dtstack.flink.sql.side.hbase; import com.dtstack.flink.sql.side.hbase.enums.EReplaceType; -import org.apache.flink.shaded.curator.org.apache.curator.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.io.Serializable; import java.util.List; diff --git a/hbase/hbase-sink/src/main/java/com/dtstack/flink/sql/sink/hbase/table/HbaseTableInfo.java b/hbase/hbase-sink/src/main/java/com/dtstack/flink/sql/sink/hbase/table/HbaseTableInfo.java index 4e286fd45..ed28f781d 100644 --- a/hbase/hbase-sink/src/main/java/com/dtstack/flink/sql/sink/hbase/table/HbaseTableInfo.java +++ b/hbase/hbase-sink/src/main/java/com/dtstack/flink/sql/sink/hbase/table/HbaseTableInfo.java @@ -22,7 +22,7 @@ import com.dtstack.flink.sql.table.TargetTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; import java.util.Map; /** diff --git a/kafka/kafka-sink/pom.xml b/kafka/kafka-sink/pom.xml new file mode 100644 index 000000000..41aa899c2 --- /dev/null +++ b/kafka/kafka-sink/pom.xml @@ -0,0 +1,89 @@ + + + + sql.kafka + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kafka + 1.0-SNAPSHOT + kafka-sink + jar + + + + + org.apache.flink + flink-json + ${flink.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer.java new file mode 100644 index 000000000..b7976a30e --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer.java @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.metric.MetricConstant; +import org.apache.flink.api.common.functions.RuntimeContext; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.metrics.Counter; +import org.apache.flink.metrics.MeterView; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer; + +import java.util.Properties; + +/** + * @author: chuixue + * @create: 2019-11-05 11:54 + * @description: + **/ +public class CustomerFlinkKafkaProducer extends FlinkKafkaProducer { + + CustomerJsonRowSerializationSchema schema; + + public CustomerFlinkKafkaProducer(String topicId, SerializationSchema serializationSchema, Properties producerConfig) { + super(topicId, serializationSchema, producerConfig); + this.schema = (CustomerJsonRowSerializationSchema) serializationSchema; + } + + @Override + public void open(Configuration configuration) { + RuntimeContext ctx = getRuntimeContext(); + Counter counter = ctx.getMetricGroup().counter(MetricConstant.DT_NUM_RECORDS_OUT); + MeterView meter = ctx.getMetricGroup().meter(MetricConstant.DT_NUM_RECORDS_OUT_RATE, new MeterView(counter, 20)); + + schema.setCounter(counter); + + try { + super.open(configuration); + } catch (Exception e) { + throw new RuntimeException("",e); + } + } + +} diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java new file mode 100644 index 000000000..d698d3ee2 --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.annotation.PublicEvolving; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo; +import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.formats.json.JsonRowDeserializationSchema; +import org.apache.flink.formats.json.JsonRowSchemaConverter; +import org.apache.flink.metrics.Counter; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ContainerNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode; +import org.apache.flink.types.Row; +import org.apache.flink.util.Preconditions; + +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Time; +import java.sql.Timestamp; +import java.text.SimpleDateFormat; + +/** + * Serialization schema that serializes an object of Flink types into a JSON bytes. + * + *

Serializes the input Flink object into a JSON string and + * converts it into byte[]. + * + *

Result byte[] messages can be deserialized using {@link JsonRowDeserializationSchema}. + */ +@PublicEvolving +public class CustomerJsonRowSerializationSchema implements SerializationSchema { + + private static final long serialVersionUID = -2885556750743978636L; + + /** Type information describing the input type. */ + private final TypeInformation typeInfo; + + /** Object mapper that is used to create output JSON objects. */ + private final ObjectMapper mapper = new ObjectMapper(); + + /** Formatter for RFC 3339-compliant string representation of a time value (with UTC timezone, without milliseconds). */ + private SimpleDateFormat timeFormat = new SimpleDateFormat("HH:mm:ss'Z'"); + + /** Formatter for RFC 3339-compliant string representation of a time value (with UTC timezone). */ + private SimpleDateFormat timeFormatWithMillis = new SimpleDateFormat("HH:mm:ss.SSS'Z'"); + + /** Formatter for RFC 3339-compliant string representation of a timestamp value (with UTC timezone). */ + private SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"); + + /** Reusable object node. */ + private transient ObjectNode node; + + private Counter counter; + + /** + * Creates a JSON serialization schema for the given type information. + * + * @param typeInfo The field names of {@link Row} are used to map to JSON properties. + */ + public CustomerJsonRowSerializationSchema(TypeInformation typeInfo) { + Preconditions.checkNotNull(typeInfo, "Type information"); + this.typeInfo = typeInfo; + } + + /** + * Creates a JSON serialization schema for the given JSON schema. + * + * @param jsonSchema JSON schema describing the result type + * + * @see http://json-schema.org/ + */ + public CustomerJsonRowSerializationSchema(String jsonSchema) { + this(JsonRowSchemaConverter.convert(jsonSchema)); + } + + @Override + public byte[] serialize(Row row) { + if (node == null) { + node = mapper.createObjectNode(); + } + + try { + convertRow(node, (RowTypeInfo) typeInfo, row); + counter.inc(); + return mapper.writeValueAsBytes(node); + } catch (Throwable t) { + throw new RuntimeException("Could not serialize row '" + row + "'. " + + "Make sure that the schema matches the input.", t); + } + } + + // -------------------------------------------------------------------------------------------- + + private ObjectNode convertRow(ObjectNode reuse, RowTypeInfo info, Row row) { + if (reuse == null) { + reuse = mapper.createObjectNode(); + } + final String[] fieldNames = info.getFieldNames(); + final TypeInformation[] fieldTypes = info.getFieldTypes(); + + // validate the row + if (row.getArity() != fieldNames.length) { + throw new IllegalStateException(String.format( + "Number of elements in the row '%s' is different from number of field names: %d", row, fieldNames.length)); + } + + for (int i = 0; i < fieldNames.length; i++) { + final String name = fieldNames[i]; + + final JsonNode fieldConverted = convert(reuse, reuse.get(name), fieldTypes[i], row.getField(i)); + reuse.set(name, fieldConverted); + } + + return reuse; + } + + private JsonNode convert(ContainerNode container, JsonNode reuse, TypeInformation info, Object object) { + if (info == Types.VOID || object == null) { + return container.nullNode(); + } else if (info == Types.BOOLEAN) { + return container.booleanNode((Boolean) object); + } else if (info == Types.STRING) { + return container.textNode((String) object); + } else if (info == Types.BIG_DEC) { + // convert decimal if necessary + if (object instanceof BigDecimal) { + return container.numberNode((BigDecimal) object); + } + return container.numberNode(BigDecimal.valueOf(((Number) object).doubleValue())); + } else if (info == Types.BIG_INT) { + // convert integer if necessary + if (object instanceof BigInteger) { + return container.numberNode((BigInteger) object); + } + return container.numberNode(BigInteger.valueOf(((Number) object).longValue())); + } else if (info == Types.SQL_DATE) { + return container.textNode(object.toString()); + } else if (info == Types.SQL_TIME) { + final Time time = (Time) object; + // strip milliseconds if possible + if (time.getTime() % 1000 > 0) { + return container.textNode(timeFormatWithMillis.format(time)); + } + return container.textNode(timeFormat.format(time)); + } else if (info == Types.SQL_TIMESTAMP) { + return container.textNode(timestampFormat.format((Timestamp) object)); + } else if (info instanceof RowTypeInfo) { + if (reuse != null && reuse instanceof ObjectNode) { + return convertRow((ObjectNode) reuse, (RowTypeInfo) info, (Row) object); + } else { + return convertRow(null, (RowTypeInfo) info, (Row) object); + } + } else if (info instanceof ObjectArrayTypeInfo) { + if (reuse != null && reuse instanceof ArrayNode) { + return convertObjectArray((ArrayNode) reuse, ((ObjectArrayTypeInfo) info).getComponentInfo(), (Object[]) object); + } else { + return convertObjectArray(null, ((ObjectArrayTypeInfo) info).getComponentInfo(), (Object[]) object); + } + } else if (info instanceof BasicArrayTypeInfo) { + if (reuse != null && reuse instanceof ArrayNode) { + return convertObjectArray((ArrayNode) reuse, ((BasicArrayTypeInfo) info).getComponentInfo(), (Object[]) object); + } else { + return convertObjectArray(null, ((BasicArrayTypeInfo) info).getComponentInfo(), (Object[]) object); + } + } else if (info instanceof PrimitiveArrayTypeInfo && ((PrimitiveArrayTypeInfo) info).getComponentType() == Types.BYTE) { + return container.binaryNode((byte[]) object); + } else { + // for types that were specified without JSON schema + // e.g. POJOs + try { + return mapper.valueToTree(object); + } catch (IllegalArgumentException e) { + throw new IllegalStateException("Unsupported type information '" + info + "' for object: " + object, e); + } + } + } + + private ArrayNode convertObjectArray(ArrayNode reuse, TypeInformation info, Object[] array) { + if (reuse == null) { + reuse = mapper.createArrayNode(); + } else { + reuse.removeAll(); + } + + for (Object object : array) { + reuse.add(convert(reuse, null, info, object)); + } + return reuse; + } + + public Counter getCounter() { + return counter; + } + + public void setCounter(Counter counter) { + this.counter = counter; + } +} diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafkaJsonTableSink.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafkaJsonTableSink.java new file mode 100644 index 000000000..af6e54854 --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafkaJsonTableSink.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.functions.sink.SinkFunction; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSink; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.utils.TableConnectorUtils; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; + +/** + * @author: chuixue + * @create: 2019-11-05 11:54 + * @description: + **/ +public class CustomerKafkaJsonTableSink extends KafkaTableSink { + + protected SerializationSchema schema; + + + public CustomerKafkaJsonTableSink(TableSchema schema, + String topic, + Properties properties, + Optional> partitioner, + SerializationSchema serializationSchema) { + + super(schema, topic, properties, partitioner, serializationSchema); + this.schema = serializationSchema; + } + + @Override + protected SinkFunction createKafkaProducer(String topic, Properties properties, SerializationSchema serializationSchema, Optional> optional) { + return new CustomerFlinkKafkaProducer(topic, serializationSchema, properties); + } + + @Override + public void emitDataStream(DataStream dataStream) { + SinkFunction kafkaProducer = createKafkaProducer(topic, properties, schema, partitioner); + // always enable flush on checkpoint to achieve at-least-once if query runs with checkpointing enabled. + //kafkaProducer.setFlushOnCheckpoint(true); + dataStream.addSink(kafkaProducer).name(TableConnectorUtils.generateRuntimeName(this.getClass(), getFieldNames())); + } +} diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java new file mode 100644 index 000000000..44bf9f98b --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -0,0 +1,144 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka; + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kafka.table.KafkaSinkTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.serialization.SerializationSchema; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TupleTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.connectors.kafka.KafkaTableSinkBase; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkFixedPartitioner; +import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; +import org.apache.flink.table.api.TableSchema; +import org.apache.flink.table.sinks.RetractStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.util.Optional; +import java.util.Properties; + +/** + * @author: chuixue + * @create: 2019-11-05 11:45 + * @description: + **/ +public class KafkaSink implements RetractStreamTableSink, IStreamSinkGener { + + protected String[] fieldNames; + + protected TypeInformation[] fieldTypes; + + protected String topic; + + protected int parallelism; + + protected Properties properties; + + /** Serialization schema for encoding records to Kafka. */ + protected SerializationSchema serializationSchema; + + /** The schema of the table. */ + private TableSchema schema; + + /** Partitioner to select Kafka partition for each item. */ + protected Optional> partitioner; + + @Override + public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { + KafkaSinkTableInfo kafkaSinkTableInfo = (KafkaSinkTableInfo) targetTableInfo; + this.topic = kafkaSinkTableInfo.getTopic(); + + properties = new Properties(); + properties.setProperty("bootstrap.servers", kafkaSinkTableInfo.getBootstrapServers()); + + for (String key : kafkaSinkTableInfo.getKafkaParamKeys()) { + properties.setProperty(key, kafkaSinkTableInfo.getKafkaParam(key)); + } + this.partitioner = Optional.of(new FlinkFixedPartitioner<>()); + this.fieldNames = kafkaSinkTableInfo.getFields(); + TypeInformation[] types = new TypeInformation[kafkaSinkTableInfo.getFields().length]; + for (int i = 0; i < kafkaSinkTableInfo.getFieldClasses().length; i++) { + types[i] = TypeInformation.of(kafkaSinkTableInfo.getFieldClasses()[i]); + } + this.fieldTypes = types; + + TableSchema.Builder schemaBuilder = TableSchema.builder(); + for (int i=0;i getRecordType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + @Override + public void emitDataStream(DataStream> dataStream) { + KafkaTableSinkBase kafkaTableSink = new CustomerKafkaJsonTableSink( + schema, + topic, + properties, + partitioner, + serializationSchema + ); + + DataStream ds = dataStream.map((Tuple2 record) -> { + return record.f1; + }).returns(getOutputType().getTypeAt(1)).setParallelism(parallelism); + + kafkaTableSink.emitDataStream(ds); + } + + @Override + public TupleTypeInfo> getOutputType() { + return new TupleTypeInfo(org.apache.flink.table.api.Types.BOOLEAN(), new RowTypeInfo(fieldTypes, fieldNames)); + } + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + @Override + public TableSink> configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } +} diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java new file mode 100644 index 000000000..f633c8112 --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +/** + * @author: chuixue + * @create: 2019-11-05 11:46 + * @description: + **/ +public class KafkaSinkParser extends AbsTableParser { + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) throws Exception { + KafkaSinkTableInfo kafkaSinkTableInfo = new KafkaSinkTableInfo(); + kafkaSinkTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafkaSinkTableInfo); + kafkaSinkTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase()))); + + if (props.get(KafkaSinkTableInfo.SINK_DATA_TYPE) != null) { + kafkaSinkTableInfo.setSinkDataType(props.get(KafkaSinkTableInfo.SINK_DATA_TYPE).toString()); + } + +// if (props.get(KafkaSinkTableInfo.FIELD_DELINITER) != null) { +// kafka11SinkTableInfo.setFieldDelimiter(props.get(KafkaSinkTableInfo.FIELD_DELINITER).toString()); +// } + + kafkaSinkTableInfo.setBootstrapServers(MathUtil.getString(props.get(KafkaSinkTableInfo.BOOTSTRAPSERVERS_KEY.toLowerCase()))); + kafkaSinkTableInfo.setTopic(MathUtil.getString(props.get(KafkaSinkTableInfo.TOPIC_KEY.toLowerCase()))); + + Integer parallelism = MathUtil.getIntegerVal(props.get(KafkaSinkTableInfo.PARALLELISM_KEY.toLowerCase())); + kafkaSinkTableInfo.setParallelism(parallelism); + + for (String key : props.keySet()) { + if (!key.isEmpty() && key.startsWith("kafka.")) { + kafkaSinkTableInfo.addKafkaParam(key.substring(6), props.get(key).toString()); + } + } + kafkaSinkTableInfo.check(); + + return kafkaSinkTableInfo; + } +} diff --git a/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java new file mode 100644 index 000000000..1d23932c1 --- /dev/null +++ b/kafka/kafka-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkTableInfo.java @@ -0,0 +1,93 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kafka.table; + +import com.dtstack.flink.sql.table.TargetTableInfo; +import com.google.common.base.Preconditions; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +/** + * @author: chuixue + * @create: 2019-11-05 11:46 + * @description: + **/ +public class KafkaSinkTableInfo extends TargetTableInfo { + //version + private static final String CURR_TYPE = "kafka"; + + public KafkaSinkTableInfo(){ + super.setType(CURR_TYPE); + } + public static final String BOOTSTRAPSERVERS_KEY = "bootstrapServers"; + + public static final String TOPIC_KEY = "topic"; + + private String bootstrapServers; + + public Map kafkaParam = new HashMap(); + + private String topic; + + + public void addKafkaParam(String key,String value){ + kafkaParam.put(key,value); + } + + public String getKafkaParam(String key){ + return kafkaParam.get(key); + } + + public Set getKafkaParamKeys(){ + return kafkaParam.keySet(); + } + + + public String getBootstrapServers() { + return bootstrapServers; + } + + public void setBootstrapServers(String bootstrapServers) { + this.bootstrapServers = bootstrapServers; + } + + public String getTopic() { + return topic; + } + + public void setTopic(String topic) { + this.topic = topic; + } + + + @Override + public boolean check() { + Preconditions.checkNotNull(bootstrapServers, "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(topic, "kafka of topic is required"); + //Preconditions.checkNotNull(kafkaParam.get("groupId"), "kafka of groupId is required"); + return false; + } + + @Override + public String getType() { + return super.getType(); + } +} diff --git a/kafka/kafka-source/pom.xml b/kafka/kafka-source/pom.xml new file mode 100644 index 000000000..55ca950fb --- /dev/null +++ b/kafka/kafka-source/pom.xml @@ -0,0 +1,81 @@ + + + + sql.kafka + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.source.kafka + jar + + kafka-source + http://maven.apache.org + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java new file mode 100644 index 000000000..6d3e57957 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerJsonDeserialization.java @@ -0,0 +1,280 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import com.dtstack.flink.sql.source.kafka.metric.KafkaTopicPartitionLagMetric; +import com.dtstack.flink.sql.table.TableInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeinfo.Types; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Strings; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.JsonNodeType; +import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.TextNode; +import org.apache.flink.streaming.connectors.kafka.internal.KafkaConsumerThread; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.types.Row; +import org.apache.kafka.clients.consumer.KafkaConsumer; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.lang.reflect.Field; +import java.sql.Date; +import java.sql.Time; +import java.sql.Timestamp; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static com.dtstack.flink.sql.metric.MetricConstant.*; + +/** + * @author: chuixue + * @create: 2019-11-05 10:57 + * @description: + **/ +public class CustomerJsonDeserialization extends AbsDeserialization { + + private static final Logger LOG = LoggerFactory.getLogger(CustomerJsonDeserialization.class); + + private static final long serialVersionUID = 2385115520960444192L; + + private static int dirtyDataFrequency = 1000; + + private final ObjectMapper objectMapper = new ObjectMapper(); + + /** Type information describing the result type. */ + private final TypeInformation typeInfo; + + /** Field names to parse. Indices match fieldTypes indices. */ + private final String[] fieldNames; + + /** Types to parse fields as. Indices match fieldNames indices. */ + private final TypeInformation[] fieldTypes; + + private AbstractFetcher fetcher; + + private boolean firstMsg = true; + + private Map nodeAndJsonNodeMapping = Maps.newHashMap(); + + private Map rowAndFieldMapping; + + private List fieldExtraInfos; + + public CustomerJsonDeserialization(TypeInformation typeInfo, Map rowAndFieldMapping, List fieldExtraInfos){ + this.typeInfo = typeInfo; + this.fieldNames = ((RowTypeInfo) typeInfo).getFieldNames(); + this.fieldTypes = ((RowTypeInfo) typeInfo).getFieldTypes(); + this.rowAndFieldMapping= rowAndFieldMapping; + this.fieldExtraInfos = fieldExtraInfos; + } + + @Override + public Row deserialize(byte[] message) throws IOException { + + if(firstMsg){ + try { + registerPtMetric(fetcher); + } catch (Exception e) { + LOG.error("register topic partition metric error.", e); + } + + firstMsg = false; + } + + try { + JsonNode root = objectMapper.readTree(message); + + if (numInRecord.getCount() % dirtyDataFrequency == 0) { + LOG.info(root.toString()); + } + + numInRecord.inc(); + numInBytes.inc(message.length); + + parseTree(root, null); + Row row = new Row(fieldNames.length); + + for (int i = 0; i < fieldNames.length; i++) { + JsonNode node = getIgnoreCase(fieldNames[i]); + TableInfo.FieldExtraInfo fieldExtraInfo = fieldExtraInfos.get(i); + + if (node == null) { + if (fieldExtraInfo != null && fieldExtraInfo.getNotNull()) { + throw new IllegalStateException("Failed to find field with name '" + + fieldNames[i] + "'."); + } else { + row.setField(i, null); + } + } else { + // Read the value as specified type + + Object value = convert(node, fieldTypes[i]); + row.setField(i, value); + } + } + + numInResolveRecord.inc(); + return row; + } catch (Exception e) { + //add metric of dirty data + if (dirtyDataCounter.getCount() % dirtyDataFrequency == 0) { + LOG.info("dirtyData: " + new String(message)); + LOG.error("" , e); + } + dirtyDataCounter.inc(); + return null; + }finally { + nodeAndJsonNodeMapping.clear(); + } + } + + public JsonNode getIgnoreCase(String key) { + String nodeMappingKey = rowAndFieldMapping.getOrDefault(key, key); + return nodeAndJsonNodeMapping.get(nodeMappingKey); + } + + private void parseTree(JsonNode jsonNode, String prefix){ + if (jsonNode.isArray()) { + ArrayNode array = (ArrayNode) jsonNode; + for (int i = 0; i < array.size(); i++) { + JsonNode child = array.get(i); + String nodeKey = getNodeKey(prefix, i); + + if (child.isValueNode()) { + nodeAndJsonNodeMapping.put(nodeKey, child); + } else { + if (rowAndFieldMapping.containsValue(nodeKey)) { + nodeAndJsonNodeMapping.put(nodeKey, child); + } + parseTree(child, nodeKey); + } + } + return; + } + + Iterator iterator = jsonNode.fieldNames(); + while (iterator.hasNext()){ + String next = iterator.next(); + JsonNode child = jsonNode.get(next); + String nodeKey = getNodeKey(prefix, next); + + if (child.isValueNode()){ + nodeAndJsonNodeMapping.put(nodeKey, child); + }else if(child.isArray()){ + parseTree(child, nodeKey); + }else { + parseTree(child, nodeKey); + } + } + } + + private String getNodeKey(String prefix, String nodeName){ + if(Strings.isNullOrEmpty(prefix)){ + return nodeName; + } + + return prefix + "." + nodeName; + } + + private String getNodeKey(String prefix, int i) { + if (Strings.isNullOrEmpty(prefix)) { + return "[" + i + "]"; + } + return prefix + "[" + i + "]"; + } + + public void setFetcher(AbstractFetcher fetcher) { + this.fetcher = fetcher; + } + + protected void registerPtMetric(AbstractFetcher fetcher) throws Exception { + + Field consumerThreadField = fetcher.getClass().getSuperclass().getDeclaredField("consumerThread"); + consumerThreadField.setAccessible(true); + KafkaConsumerThread consumerThread = (KafkaConsumerThread) consumerThreadField.get(fetcher); + + Field hasAssignedPartitionsField = consumerThread.getClass().getDeclaredField("hasAssignedPartitions"); + hasAssignedPartitionsField.setAccessible(true); + + //wait until assignedPartitions + + boolean hasAssignedPartitions = (boolean) hasAssignedPartitionsField.get(consumerThread); + + if(!hasAssignedPartitions){ + throw new RuntimeException("wait 50 secs, but not assignedPartitions"); + } + + Field consumerField = consumerThread.getClass().getDeclaredField("consumer"); + consumerField.setAccessible(true); + + KafkaConsumer kafkaConsumer = (KafkaConsumer) consumerField.get(consumerThread); + Field subscriptionStateField = kafkaConsumer.getClass().getDeclaredField("subscriptions"); + subscriptionStateField.setAccessible(true); + + //topic partitions lag + SubscriptionState subscriptionState = (SubscriptionState) subscriptionStateField.get(kafkaConsumer); + Set assignedPartitions = subscriptionState.assignedPartitions(); + for(TopicPartition topicPartition : assignedPartitions){ + MetricGroup metricGroup = getRuntimeContext().getMetricGroup().addGroup(DT_TOPIC_GROUP, topicPartition.topic()) + .addGroup(DT_PARTITION_GROUP, topicPartition.partition() + ""); + metricGroup.gauge(DT_TOPIC_PARTITION_LAG_GAUGE, new KafkaTopicPartitionLagMetric(subscriptionState, topicPartition)); + } + + } + + private static String partitionLagMetricName(TopicPartition tp) { + return tp + ".records-lag"; + } + + private Object convert(JsonNode node, TypeInformation info) { + if (info.getTypeClass().equals(Types.BOOLEAN.getTypeClass())) { + return node.asBoolean(); + } else if (info.getTypeClass().equals(Types.STRING.getTypeClass())) { + return node.asText(); + } else if (info.getTypeClass().equals(Types.SQL_DATE.getTypeClass())) { + return Date.valueOf(node.asText()); + } else if (info.getTypeClass().equals(Types.SQL_TIME.getTypeClass())) { + // local zone + return Time.valueOf(node.asText()); + } else if (info.getTypeClass().equals(Types.SQL_TIMESTAMP.getTypeClass())) { + // local zone + return Timestamp.valueOf(node.asText()); + } else { + // for types that were specified without JSON schema + // e.g. POJOs + try { + return objectMapper.treeToValue(node, info.getTypeClass()); + } catch (JsonProcessingException e) { + throw new IllegalStateException("Unsupported type information '" + info + "' for node: " + node); + } + } + } +} diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafkaConsumer.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafkaConsumer.java new file mode 100644 index 000000000..7d35a35b3 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/CustomerKafkaConsumer.java @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka; + +import com.dtstack.flink.sql.source.AbsDeserialization; +import org.apache.flink.metrics.MetricGroup; +import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; +import org.apache.flink.streaming.api.functions.AssignerWithPunctuatedWatermarks; +import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; +import org.apache.flink.streaming.connectors.kafka.config.OffsetCommitMode; +import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.types.Row; +import org.apache.flink.util.SerializedValue; + +import java.util.Arrays; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * @author: chuixue + * @create: 2019-11-05 10:58 + * @description: + **/ +public class CustomerKafkaConsumer extends FlinkKafkaConsumer { + + private static final long serialVersionUID = -2265366268827807739L; + + private CustomerJsonDeserialization customerJsonDeserialization; + + public CustomerKafkaConsumer(String topic, AbsDeserialization valueDeserializer, Properties props) { + super(Arrays.asList(topic.split(",")), valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + public CustomerKafkaConsumer(Pattern subscriptionPattern, AbsDeserialization valueDeserializer, Properties props) { + super(subscriptionPattern, valueDeserializer, props); + this.customerJsonDeserialization = (CustomerJsonDeserialization) valueDeserializer; + } + + @Override + public void run(SourceContext sourceContext) throws Exception { + customerJsonDeserialization.setRuntimeContext(getRuntimeContext()); + customerJsonDeserialization.initMetric(); + super.run(sourceContext); + } + + @Override + protected AbstractFetcher createFetcher(SourceContext sourceContext, Map assignedPartitionsWithInitialOffsets, SerializedValue> watermarksPeriodic, SerializedValue> watermarksPunctuated, StreamingRuntimeContext runtimeContext, OffsetCommitMode offsetCommitMode, MetricGroup consumerMetricGroup, boolean useMetrics) throws Exception { + AbstractFetcher fetcher = super.createFetcher(sourceContext, assignedPartitionsWithInitialOffsets, watermarksPeriodic, watermarksPunctuated, runtimeContext, offsetCommitMode, consumerMetricGroup, useMetrics); + customerJsonDeserialization.setFetcher(fetcher); + return fetcher; + } +} diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java new file mode 100644 index 000000000..c26d99b62 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka; + +import com.dtstack.flink.sql.source.IStreamSourceGener; +import com.dtstack.flink.sql.source.kafka.table.KafkaSourceTableInfo; +import com.dtstack.flink.sql.table.SourceTableInfo; +import com.dtstack.flink.sql.util.DtStringUtil; +import com.dtstack.flink.sql.util.PluginUtil; +import org.apache.commons.lang3.BooleanUtils; +import org.apache.commons.lang3.StringUtils; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; +import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; +import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer; +import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; +import org.apache.flink.table.api.Table; +import org.apache.flink.table.api.java.StreamTableEnvironment; +import org.apache.flink.types.Row; + +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; +import java.util.regex.Pattern; + +/** + * @author: chuixue + * @create: 2019-11-05 10:55 + * @description: + **/ +public class KafkaSource implements IStreamSourceGener { + + private static final String SOURCE_OPERATOR_NAME_TPL = "${topic}_${table}"; + + /** + * Get kafka data source, you need to provide the data field names, data types + * If you do not specify auto.offset.reset, the default use groupoffset + * + * @param sourceTableInfo + * @return + */ + @SuppressWarnings("rawtypes") + @Override + public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnvironment env, StreamTableEnvironment tableEnv) { + + KafkaSourceTableInfo kafkaSourceTableInfo = (KafkaSourceTableInfo) sourceTableInfo; + String topicName = kafkaSourceTableInfo.getTopic(); + + Properties props = new Properties(); + props.setProperty("bootstrap.servers", kafkaSourceTableInfo.getBootstrapServers()); + if (DtStringUtil.isJosn(kafkaSourceTableInfo.getOffsetReset())) { + props.setProperty("auto.offset.reset", "none"); + } else { + props.setProperty("auto.offset.reset", kafkaSourceTableInfo.getOffsetReset()); + } + if (StringUtils.isNotBlank(kafkaSourceTableInfo.getGroupId())) { + props.setProperty("group.id", kafkaSourceTableInfo.getGroupId()); + } + + TypeInformation[] types = new TypeInformation[kafkaSourceTableInfo.getFields().length]; + for (int i = 0; i < kafkaSourceTableInfo.getFieldClasses().length; i++) { + types[i] = TypeInformation.of(kafkaSourceTableInfo.getFieldClasses()[i]); + } + + TypeInformation typeInformation = new RowTypeInfo(types, kafkaSourceTableInfo.getFields()); + + FlinkKafkaConsumer kafkaSrc; + if (BooleanUtils.isTrue(kafkaSourceTableInfo.getTopicIsPattern())) { + kafkaSrc = new CustomerKafkaConsumer(Pattern.compile(topicName), + new CustomerJsonDeserialization(typeInformation, kafkaSourceTableInfo.getPhysicalFields(), kafkaSourceTableInfo.getFieldExtraInfoList()), props); + } else { + kafkaSrc = new CustomerKafkaConsumer(topicName, + new CustomerJsonDeserialization(typeInformation, kafkaSourceTableInfo.getPhysicalFields(), kafkaSourceTableInfo.getFieldExtraInfoList()), props); + } + + //earliest,latest + if ("earliest".equalsIgnoreCase(kafkaSourceTableInfo.getOffsetReset())) { + kafkaSrc.setStartFromEarliest(); + } else if (DtStringUtil.isJosn(kafkaSourceTableInfo.getOffsetReset())) {// {"0":12312,"1":12321,"2":12312} + try { + Properties properties = PluginUtil.jsonStrToObject(kafkaSourceTableInfo.getOffsetReset(), Properties.class); + Map offsetMap = PluginUtil.ObjectToMap(properties); + Map specificStartupOffsets = new HashMap<>(); + for (Map.Entry entry : offsetMap.entrySet()) { + specificStartupOffsets.put(new KafkaTopicPartition(topicName, Integer.valueOf(entry.getKey())), Long.valueOf(entry.getValue().toString())); + } + kafkaSrc.setStartFromSpecificOffsets(specificStartupOffsets); + } catch (Exception e) { + throw new RuntimeException("not support offsetReset type:" + kafkaSourceTableInfo.getOffsetReset()); + } + } else { + kafkaSrc.setStartFromLatest(); + } + + String fields = StringUtils.join(kafkaSourceTableInfo.getFields(), ","); + String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); + + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafkaSourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); + } +} diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/metric/KafkaTopicPartitionLagMetric.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/metric/KafkaTopicPartitionLagMetric.java new file mode 100644 index 000000000..7810056d7 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/metric/KafkaTopicPartitionLagMetric.java @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.metric; + +import org.apache.flink.metrics.Gauge; +import org.apache.kafka.clients.consumer.internals.SubscriptionState; +import org.apache.kafka.common.TopicPartition; +import org.apache.kafka.common.requests.IsolationLevel; + +/** + * @author: chuixue + * @create: 2019-11-05 11:09 + * @description: + **/ +public class KafkaTopicPartitionLagMetric implements Gauge { + + private SubscriptionState subscriptionState; + + private TopicPartition tp; + + public KafkaTopicPartitionLagMetric(SubscriptionState subscriptionState, TopicPartition tp){ + this.subscriptionState = subscriptionState; + this.tp = tp; + } + + @Override + public Long getValue() { + return subscriptionState.partitionLag(tp, IsolationLevel.READ_UNCOMMITTED); + } +} diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java new file mode 100644 index 000000000..a99f49298 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.table; + +import com.dtstack.flink.sql.table.AbsSourceParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.ClassUtil; +import com.dtstack.flink.sql.util.MathUtil; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * @author: chuixue + * @create: 2019-11-05 11:08 + * @description: + **/ +public class KafkaSourceParser extends AbsSourceParser { + + private static final Logger LOG = LoggerFactory.getLogger(KafkaSourceParser.class); + + private static final String KAFKA_NEST_FIELD_KEY = "nestFieldKey"; + + private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\S+\\.)*\\S+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); + + static { + keyPatternMap.put(KAFKA_NEST_FIELD_KEY, kafkaNestFieldKeyPattern); + + keyHandlerMap.put(KAFKA_NEST_FIELD_KEY, KafkaSourceParser::dealNestField); + } + + /** + * add parser for alias field + * + * @param matcher + * @param tableInfo + */ + static void dealNestField(Matcher matcher, TableInfo tableInfo) { + String physicalField = matcher.group(1); + String fieldType = matcher.group(3); + String mappingField = matcher.group(4); + Class fieldClass = ClassUtil.stringConvertClass(fieldType); + boolean notNull = matcher.group(5) != null; + TableInfo.FieldExtraInfo fieldExtraInfo = new TableInfo.FieldExtraInfo(); + fieldExtraInfo.setNotNull(notNull); + + tableInfo.addPhysicalMappings(mappingField, physicalField); + tableInfo.addField(mappingField); + tableInfo.addFieldClass(fieldClass); + tableInfo.addFieldType(fieldType); + tableInfo.addFieldExtraInfo(fieldExtraInfo); + if (LOG.isInfoEnabled()) { + LOG.info(physicalField + "--->" + mappingField + " Class: " + fieldClass.toString()); + } + } + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) throws Exception { + KafkaSourceTableInfo kafkaSourceTableInfo = new KafkaSourceTableInfo(); + kafkaSourceTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kafkaSourceTableInfo); + + kafkaSourceTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(KafkaSourceTableInfo.PARALLELISM_KEY.toLowerCase()))); + String bootstrapServer = MathUtil.getString(props.get(KafkaSourceTableInfo.BOOTSTRAPSERVERS_KEY.toLowerCase())); + if (bootstrapServer == null || bootstrapServer.trim().equals("")) { + throw new Exception("BootstrapServers can not be empty!"); + } else { + kafkaSourceTableInfo.setBootstrapServers(bootstrapServer); + } + kafkaSourceTableInfo.setGroupId(MathUtil.getString(props.get(KafkaSourceTableInfo.GROUPID_KEY.toLowerCase()))); + kafkaSourceTableInfo.setTopic(MathUtil.getString(props.get(KafkaSourceTableInfo.TOPIC_KEY.toLowerCase()))); + kafkaSourceTableInfo.setOffsetReset(MathUtil.getString(props.get(KafkaSourceTableInfo.OFFSETRESET_KEY.toLowerCase()))); + kafkaSourceTableInfo.setTopicIsPattern(MathUtil.getBoolean(props.get(KafkaSourceTableInfo.TOPICISPATTERN_KEY.toLowerCase()))); + kafkaSourceTableInfo.setTimeZone(MathUtil.getString(props.get(KafkaSourceTableInfo.TIME_ZONE_KEY.toLowerCase()))); + kafkaSourceTableInfo.check(); + return kafkaSourceTableInfo; + } +} diff --git a/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java new file mode 100644 index 000000000..33b704ac0 --- /dev/null +++ b/kafka/kafka-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.source.kafka.table; + +import com.dtstack.flink.sql.table.SourceTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +/** + * @author: chuixue + * @create: 2019-11-05 11:09 + * @description: + **/ +public class KafkaSourceTableInfo extends SourceTableInfo { + + //version + private static final String CURR_TYPE = "kafka"; + + public static final String BOOTSTRAPSERVERS_KEY = "bootstrapServers"; + + public static final String TOPIC_KEY = "topic"; + + public static final String GROUPID_KEY = "groupId"; + + public static final String OFFSETRESET_KEY = "offsetReset"; + + public static final String TOPICISPATTERN_KEY = "topicIsPattern"; + + private String bootstrapServers; + + private String topic; + + private String groupId; + + private Boolean topicIsPattern = false; + + public Boolean getTopicIsPattern() { + return topicIsPattern; + } + + public void setTopicIsPattern(Boolean topicIsPattern) { + if (topicIsPattern == null) return; + + this.topicIsPattern = topicIsPattern; + } + + //latest, earliest + private String offsetReset = "latest"; + + private String offset; + + public KafkaSourceTableInfo() { + super.setType(CURR_TYPE); + } + + + public String getBootstrapServers() { + return bootstrapServers; + } + + public void setBootstrapServers(String bootstrapServers) { + this.bootstrapServers = bootstrapServers; + } + + public String getTopic() { + return topic; + } + + public void setTopic(String topic) { + this.topic = topic; + } + + public String getGroupId() { + return groupId; + } + + public void setGroupId(String groupId) { + this.groupId = groupId; + } + + public String getOffsetReset() { + return offsetReset; + } + + public void setOffsetReset(String offsetReset) { + if (offsetReset == null) { + return; + } + + this.offsetReset = offsetReset; + } + + public String getOffset() { + return offset; + } + + public void setOffset(String offset) { + this.offset = offset; + } + + @Override + public boolean check() { + Preconditions.checkNotNull(bootstrapServers, "kafka of bootstrapServers is required"); + Preconditions.checkNotNull(topic, "kafka of topic is required"); + return false; + } + + @Override + public String getType() { +// return super.getType() + SOURCE_SUFFIX; + return super.getType(); + } +} diff --git a/kafka/pom.xml b/kafka/pom.xml new file mode 100644 index 000000000..772671ff6 --- /dev/null +++ b/kafka/pom.xml @@ -0,0 +1,43 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + + 4.0.0 + + sql.kafka + pom + + + kafka-source + kafka-sink + + + + + org.apache.flink + flink-connector-kafka_2.11 + ${flink.version} + + + + junit + junit + 3.8.1 + test + + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + + + \ No newline at end of file diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java index 0ca57e0b3..f9f4c897c 100644 --- a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerCsvDeserialization.java @@ -25,7 +25,7 @@ import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java index 900717b98..3f0c10737 100644 --- a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/deserialization/CustomerJsonDeserialization.java @@ -24,8 +24,8 @@ import com.dtstack.flink.sql.source.AbsDeserialization; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.connectors.kafka.internals.AbstractFetcher; import org.apache.flink.types.Row; import org.apache.kafka.common.TopicPartition; diff --git a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 9f7e36f56..93e9d52f7 100644 --- a/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka08/kafka08-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -21,7 +21,7 @@ package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; import java.util.HashMap; import java.util.Map; diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer09.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer09.java index b026bf2c6..1660063c4 100644 --- a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer09.java +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer09.java @@ -23,17 +23,7 @@ import org.apache.flink.configuration.Configuration; import org.apache.flink.metrics.Counter; import org.apache.flink.metrics.MeterView; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer09; -import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaDelegatePartitioner; -import org.apache.kafka.clients.producer.Callback; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.common.Metric; -import org.apache.kafka.common.MetricName; - -import java.util.Map; import java.util.Properties; /** diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java index 079d13a84..be9538464 100644 --- a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java @@ -27,11 +27,11 @@ import org.apache.flink.formats.json.JsonRowDeserializationSchema; import org.apache.flink.formats.json.JsonRowSchemaConverter; import org.apache.flink.metrics.Counter; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ContainerNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ContainerNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.flink.types.Row; import org.apache.flink.util.Preconditions; diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka09JsonTableSink.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka09JsonTableSink.java index 47ff5dab4..ffd44e9f7 100644 --- a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka09JsonTableSink.java +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka09JsonTableSink.java @@ -22,7 +22,6 @@ import org.apache.flink.streaming.api.functions.sink.SinkFunction; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducerBase; import org.apache.flink.streaming.connectors.kafka.Kafka09TableSink; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaDelegatePartitioner; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.table.api.TableSchema; import org.apache.flink.table.utils.TableConnectorUtils; diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java index bf3f100e7..a6c3b98a4 100644 --- a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -64,6 +64,9 @@ public class KafkaSink implements RetractStreamTableSink, IStreamSinkGener< /** Partitioner to select Kafka partition for each item. */ protected Optional> partitioner; + protected int parallelism; + + @Override public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { @@ -85,11 +88,16 @@ public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { this.fieldTypes = types; TableSchema.Builder schemaBuilder = TableSchema.builder(); - for (int i=0;i> dataStream) { DataStream ds = dataStream.map((Tuple2 record) -> { return record.f1; - }).returns(getOutputType().getTypeAt(1)); + }).returns(getOutputType().getTypeAt(1)).setParallelism(parallelism); kafkaTableSink.emitDataStream(ds); } diff --git a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java index 3e1f707e2..dd4d4450f 100644 --- a/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java +++ b/kafka09/kafka09-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -42,6 +42,10 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map iterator = jsonNode.fieldNames(); while (iterator.hasNext()){ String next = iterator.next(); @@ -186,7 +191,7 @@ private void parseTree(JsonNode jsonNode, String prefix){ if (child.isValueNode()){ nodeAndJsonNodeMapping.put(nodeKey, child); } else if(child.isArray()){ - nodeAndJsonNodeMapping.put(nodeKey, new TextNode(child.toString())); + parseTree(child, nodeKey); }else { parseTree(child, nodeKey); } @@ -201,6 +206,14 @@ private String getNodeKey(String prefix, String nodeName){ return prefix + "." + nodeName; } + private String getNodeKey(String prefix, int i) { + if (Strings.isNullOrEmpty(prefix)) { + return "[" + i + "]"; + } + + return prefix + "[" + i + "]"; + } + public void setFetcher(AbstractFetcher fetcher) { this.fetcher = fetcher; } diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index cb14f3af8..8de9ce703 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -29,6 +29,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer09; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; @@ -114,6 +115,12 @@ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnv String fields = StringUtils.join(kafka09SourceTableInfo.getFields(), ","); String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); + + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka09SourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); } } \ No newline at end of file diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index d147e4a19..106d2aeb9 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -44,7 +44,7 @@ public class KafkaSourceParser extends AbsSourceParser { private static final String KAFKA_NEST_FIELD_KEY = "nestFieldKey"; - private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\w+\\.)*\\w+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); + private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\S+\\.)*\\S+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); static { keyPatternMap.put(KAFKA_NEST_FIELD_KEY, kafkaNestFieldKeyPattern); diff --git a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 9081b956b..e6098fb3c 100644 --- a/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka09/kafka09-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer010.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer010.java index 50da17099..2f11c355d 100644 --- a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer010.java +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerFlinkKafkaProducer010.java @@ -23,15 +23,7 @@ import org.apache.flink.configuration.Configuration; import org.apache.flink.metrics.Counter; import org.apache.flink.metrics.MeterView; -import org.apache.flink.metrics.MetricGroup; -import org.apache.flink.streaming.api.operators.StreamingRuntimeContext; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducer010; -import org.apache.flink.streaming.connectors.kafka.internals.metrics.KafkaMetricWrapper; -import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaDelegatePartitioner; -import org.apache.kafka.clients.producer.Callback; -import org.apache.kafka.clients.producer.RecordMetadata; -import org.apache.kafka.common.Metric; -import org.apache.kafka.common.MetricName; import java.util.Map; import java.util.Properties; diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java index 7d1d45345..8033c851e 100644 --- a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java @@ -27,11 +27,11 @@ import org.apache.flink.formats.json.JsonRowDeserializationSchema; import org.apache.flink.formats.json.JsonRowSchemaConverter; import org.apache.flink.metrics.Counter; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ContainerNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ContainerNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.flink.types.Row; import org.apache.flink.util.Preconditions; diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka10JsonTableSink.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka10JsonTableSink.java index 01762079a..d3edc2d86 100644 --- a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka10JsonTableSink.java +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerKafka10JsonTableSink.java @@ -19,13 +19,11 @@ import org.apache.flink.api.common.serialization.SerializationSchema; import org.apache.flink.streaming.api.datastream.DataStream; -import org.apache.flink.streaming.api.datastream.DataStreamSink; import org.apache.flink.streaming.api.functions.sink.SinkFunction; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaProducerBase; import org.apache.flink.streaming.connectors.kafka.Kafka010TableSink; import org.apache.flink.streaming.connectors.kafka.partitioner.FlinkKafkaPartitioner; import org.apache.flink.table.api.TableSchema; -import org.apache.flink.table.util.TableConnectorUtil; import org.apache.flink.table.utils.TableConnectorUtils; import org.apache.flink.types.Row; diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java index aa31727cf..8c757ce4a 100644 --- a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -58,6 +58,8 @@ public class KafkaSink implements RetractStreamTableSink, IStreamSinkGener< protected Properties properties; + protected int parallelism; + /** Serialization schema for encoding records to Kafka. */ protected SerializationSchema serializationSchema; @@ -87,12 +89,18 @@ public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { } this.fieldTypes = types; + TableSchema.Builder schemaBuilder = TableSchema.builder(); for (int i=0;i> dataStream) { DataStream ds = dataStream.map((Tuple2 record) -> { return record.f1; - }).returns(getOutputType().getTypeAt(1)); + }).returns(getOutputType().getTypeAt(1)).setParallelism(parallelism); kafkaTableSink.emitDataStream(ds); } diff --git a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java index 9f250fc03..073fe9111 100644 --- a/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java +++ b/kafka10/kafka10-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -42,6 +42,10 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map iterator = jsonNode.fieldNames(); while (iterator.hasNext()){ String next = iterator.next(); JsonNode child = jsonNode.get(next); String nodeKey = getNodeKey(prefix, next); - if (child.isValueNode()){ + if (child.isValueNode()) { nodeAndJsonNodeMapping.put(nodeKey, child); - }else if(child.isArray()){ - nodeAndJsonNodeMapping.put(nodeKey, new TextNode(child.toString())); - }else { + } else if(child.isArray()){ + parseTree(child, nodeKey); + } else { parseTree(child, nodeKey); } } @@ -202,6 +208,14 @@ private String getNodeKey(String prefix, String nodeName){ return prefix + "." + nodeName; } + private String getNodeKey(String prefix, int i) { + if (Strings.isNullOrEmpty(prefix)) { + return "[" + i + "]"; + } + + return prefix + "[" + i + "]"; + } + public void setFetcher(AbstractFetcher fetcher) { this.fetcher = fetcher; } diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index f875329e6..ab5ad8833 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -28,6 +28,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer010; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; @@ -116,6 +117,12 @@ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnv String fields = StringUtils.join(kafka010SourceTableInfo.getFields(), ","); String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); + + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka010SourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); } } \ No newline at end of file diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index 44f22497f..563110726 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -44,7 +44,7 @@ public class KafkaSourceParser extends AbsSourceParser { private static final String KAFKA_NEST_FIELD_KEY = "nestFieldKey"; - private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\w+\\.)*\\w+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); + private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\S+\\.)*\\S+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); static { keyPatternMap.put(KAFKA_NEST_FIELD_KEY, kafkaNestFieldKeyPattern); diff --git a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 113159450..da78a2af6 100644 --- a/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka10/kafka10-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -21,7 +21,7 @@ package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java index 4b77cac76..0d575a5fd 100644 --- a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/CustomerJsonRowSerializationSchema.java @@ -28,11 +28,11 @@ import org.apache.flink.formats.json.JsonRowDeserializationSchema; import org.apache.flink.formats.json.JsonRowSchemaConverter; import org.apache.flink.metrics.Counter; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ContainerNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ArrayNode; +import com.fasterxml.jackson.databind.node.ContainerNode; +import com.fasterxml.jackson.databind.node.ObjectNode; import org.apache.flink.types.Row; import org.apache.flink.util.Preconditions; diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java index 90092e18a..5b0ac683f 100644 --- a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/KafkaSink.java @@ -56,6 +56,8 @@ public class KafkaSink implements RetractStreamTableSink, IStreamSinkGener protected String topic; + protected int parallelism; + protected Properties properties; /** Serialization schema for encoding records to Kafka. */ @@ -88,10 +90,16 @@ public KafkaSink genStreamSink(TargetTableInfo targetTableInfo) { this.fieldTypes = types; TableSchema.Builder schemaBuilder = TableSchema.builder(); - for (int i=0;i> dataStream) { partitioner, serializationSchema ); + DataStream ds = dataStream.map((Tuple2 record) -> { return record.f1; - }).returns(getOutputType().getTypeAt(1)); + }).returns(getOutputType().getTypeAt(1)).setParallelism(parallelism); kafkaTableSink.emitDataStream(ds); } diff --git a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java index b9f84971a..ca4ffd5cf 100644 --- a/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java +++ b/kafka11/kafka11-sink/src/main/java/com/dtstack/flink/sql/sink/kafka/table/KafkaSinkParser.java @@ -50,6 +50,10 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map iterator = jsonNode.fieldNames(); while (iterator.hasNext()){ String next = iterator.next(); @@ -190,7 +195,7 @@ private void parseTree(JsonNode jsonNode, String prefix){ if (child.isValueNode()){ nodeAndJsonNodeMapping.put(nodeKey, child); }else if(child.isArray()){ - nodeAndJsonNodeMapping.put(nodeKey, new TextNode(child.toString())); + parseTree(child, nodeKey); }else { parseTree(child, nodeKey); } @@ -205,6 +210,14 @@ private String getNodeKey(String prefix, String nodeName){ return prefix + "." + nodeName; } + private String getNodeKey(String prefix, int i) { + if (Strings.isNullOrEmpty(prefix)) { + return "[" + i + "]"; + } + + return prefix + "[" + i + "]"; + } + public void setFetcher(AbstractFetcher fetcher) { this.fetcher = fetcher; } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java index 10a5615d5..8a552cc40 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/KafkaSource.java @@ -28,6 +28,7 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStreamSource; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer011; import org.apache.flink.streaming.connectors.kafka.internals.KafkaTopicPartition; @@ -116,6 +117,12 @@ public Table genStreamSource(SourceTableInfo sourceTableInfo, StreamExecutionEnv String fields = StringUtils.join(kafka011SourceTableInfo.getFields(), ","); String sourceOperatorName = SOURCE_OPERATOR_NAME_TPL.replace("${topic}", topicName).replace("${table}", sourceTableInfo.getName()); - return tableEnv.fromDataStream(env.addSource(kafkaSrc, sourceOperatorName, typeInformation), fields); + + DataStreamSource kafkaSource = env.addSource(kafkaSrc, sourceOperatorName, typeInformation); + Integer parallelism = kafka011SourceTableInfo.getParallelism(); + if (parallelism != null) { + kafkaSource.setParallelism(parallelism); + } + return tableEnv.fromDataStream(kafkaSource, fields); } } diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java index 79686921f..07e12b0c0 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceParser.java @@ -44,7 +44,7 @@ public class KafkaSourceParser extends AbsSourceParser { private static final String KAFKA_NEST_FIELD_KEY = "nestFieldKey"; - private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\w+\\.)*\\w+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); + private static Pattern kafkaNestFieldKeyPattern = Pattern.compile("(?i)((@*\\S+\\.)*\\S+)\\s+(\\w+)\\s+AS\\s+(\\w+)(\\s+NOT\\s+NULL)?$"); static { keyPatternMap.put(KAFKA_NEST_FIELD_KEY, kafkaNestFieldKeyPattern); diff --git a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java index 4173063de..7a166695b 100644 --- a/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java +++ b/kafka11/kafka11-source/src/main/java/com/dtstack/flink/sql/source/kafka/table/KafkaSourceTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.source.kafka.table; import com.dtstack.flink.sql.table.SourceTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/kudu/kudu-side/kudu-all-side/pom.xml b/kudu/kudu-side/kudu-all-side/pom.xml new file mode 100644 index 000000000..5c35d4cd9 --- /dev/null +++ b/kudu/kudu-side/kudu-all-side/pom.xml @@ -0,0 +1,89 @@ + + + + sql.side.kudu + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.all.kudu + kudu-all-side + + jar + + + + com.dtstack.flink + sql.side.kudu.core + 1.0-SNAPSHOT + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllReqRow.java b/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllReqRow.java new file mode 100644 index 000000000..681f5faab --- /dev/null +++ b/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllReqRow.java @@ -0,0 +1,396 @@ +package com.dtstack.flink.sql.side.kudu; + +import com.dtstack.flink.sql.side.AllReqRow; +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.kudu.table.KuduSideTableInfo; +import org.apache.calcite.sql.JoinType; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; +import org.apache.flink.util.Collector; +import org.apache.flink.util.Preconditions; +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.SQLException; +import java.sql.Timestamp; +import java.util.*; +import java.util.concurrent.atomic.AtomicReference; + +public class KuduAllReqRow extends AllReqRow { + + private static final long serialVersionUID = 6051774809356082219L; + + private static final Logger LOG = LoggerFactory.getLogger(KuduAllReqRow.class); + /** + * 获取连接的尝试次数 + */ + private static final int CONN_RETRY_NUM = 3; + /** + * 缓存条数 + */ + private static final Long FETCH_SIZE = 1000L; + + private KuduClient client; + + private KuduTable table; + + + private AtomicReference>>> cacheRef = new AtomicReference<>(); + + public KuduAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new KuduAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + + @Override + public Row fillData(Row input, Object sideInput) { + Map cacheInfo = (Map) sideInput; + Row row = new Row(sideInfo.getOutFieldInfoList().size()); + for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { + Object obj = input.getField(entry.getValue()); + boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); + + //Type information for indicating event or processing time. However, it behaves like a regular SQL timestamp but is serialized as Long. + if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { + obj = ((Timestamp) obj).getTime(); + } + row.setField(entry.getKey(), obj); + } + + for (Map.Entry entry : sideInfo.getSideFieldNameIndex().entrySet()) { + if (cacheInfo == null) { + row.setField(entry.getKey(), null); + } else { + row.setField(entry.getKey(), cacheInfo.get(entry.getValue())); + } + } + + return row; + } + + @Override + protected void initCache() throws SQLException { + Map>> newCache = Maps.newConcurrentMap(); + cacheRef.set(newCache); + loadData(newCache); + } + + + @Override + protected void reloadCache() { + //reload cacheRef and replace to old cacheRef + Map>> newCache = Maps.newConcurrentMap(); + loadData(newCache); + + cacheRef.set(newCache); + LOG.info("----- Mongo all cacheRef reload end:{}", Calendar.getInstance()); + } + + + @Override + public void flatMap(Row value, Collector out) throws Exception { + List inputParams = Lists.newArrayList(); + for (Integer conValIndex : sideInfo.getEqualValIndex()) { + Object equalObj = value.getField(conValIndex); + if (equalObj == null) { + out.collect(null); + } + inputParams.add(equalObj); + } + + String key = buildKey(inputParams); + List> cacheList = cacheRef.get().get(key); + if (CollectionUtils.isEmpty(cacheList)) { + if (sideInfo.getJoinType() == JoinType.LEFT) { + Row row = fillData(value, null); + out.collect(row); + } + return; + } + + for (Map one : cacheList) { + out.collect(fillData(value, one)); + } + } + + private void loadData(Map>> tmpCache) { + KuduSideTableInfo tableInfo = (KuduSideTableInfo) sideInfo.getSideTableInfo(); + KuduScanner scanner = null; + try { + for (int i = 0; i < CONN_RETRY_NUM; i++) { + try { + scanner = getConn(tableInfo); + break; + } catch (Exception e) { + if (i == CONN_RETRY_NUM - 1) { + throw new RuntimeException("", e); + } + try { + String connInfo = "kuduMasters:" + tableInfo.getKuduMasters() + ";tableName:" + tableInfo.getTableName(); + LOG.warn("get conn fail, wait for 5 sec and try again, connInfo:" + connInfo); + Thread.sleep(5 * 1000); + } catch (InterruptedException e1) { + e1.printStackTrace(); + } + } + } + //load data from table + assert scanner != null; + String[] sideFieldNames = sideInfo.getSideSelectFields().split(","); + + + while (scanner.hasMoreRows()) { + RowResultIterator results = scanner.nextRows(); + while (results.hasNext()) { + RowResult result = results.next(); + Map oneRow = Maps.newHashMap(); + for (String sideFieldName1 : sideFieldNames) { + String sideFieldName = sideFieldName1.trim(); + ColumnSchema columnSchema = table.getSchema().getColumn(sideFieldName); + if (null != columnSchema) { + setMapValue(columnSchema.getType(), oneRow, sideFieldName, result); + } + } + String cacheKey = buildKey(oneRow, sideInfo.getEqualFieldList()); + List> list = tmpCache.computeIfAbsent(cacheKey, key -> Lists.newArrayList()); + list.add(oneRow); + } + } + + } catch (Exception e) { + LOG.error("", e); + } finally { + if (null != scanner) { + try { + scanner.close(); + } catch (KuduException e) { + LOG.error("Error while closing scanner.", e); + } + } + //放置到close中关闭 每次刷新时间较长则可以选择在这里关闭 +// if (null != client) { +// try { +// client.close(); +// } catch (Exception e) { +// LOG.error("Error while closing client.", e); +// } +// } + + } + + + } + + private String buildKey(List equalValList) { + StringBuilder sb = new StringBuilder(""); + for (Object equalVal : equalValList) { + sb.append(equalVal).append("_"); + } + + return sb.toString(); + } + + private String buildKey(Map val, List equalFieldList) { + StringBuilder sb = new StringBuilder(""); + for (String equalField : equalFieldList) { + sb.append(val.get(equalField)).append("_"); + } + return sb.toString(); + } + + private KuduScanner getConn(KuduSideTableInfo tableInfo) { + try { + if (client == null) { + String kuduMasters = tableInfo.getKuduMasters(); + String tableName = tableInfo.getTableName(); + Integer workerCount = tableInfo.getWorkerCount(); + Integer defaultSocketReadTimeoutMs = tableInfo.getDefaultSocketReadTimeoutMs(); + Integer defaultOperationTimeoutMs = tableInfo.getDefaultOperationTimeoutMs(); + + Preconditions.checkNotNull(kuduMasters, "kuduMasters could not be null"); + + KuduClient.KuduClientBuilder kuduClientBuilder = new KuduClient.KuduClientBuilder(kuduMasters); + if (null != workerCount) { + kuduClientBuilder.workerCount(workerCount); + } + if (null != defaultSocketReadTimeoutMs) { + kuduClientBuilder.defaultSocketReadTimeoutMs(defaultSocketReadTimeoutMs); + } + + if (null != defaultOperationTimeoutMs) { + kuduClientBuilder.defaultOperationTimeoutMs(defaultOperationTimeoutMs); + } + client = kuduClientBuilder.build(); + + if (!client.tableExists(tableName)) { + throw new IllegalArgumentException("Table Open Failed , please check table exists"); + } + table = client.openTable(tableName); + } + Schema schema = table.getSchema(); + LOG.info("connect kudu is successed!"); + KuduScanner.KuduScannerBuilder tokenBuilder = client.newScannerBuilder(table); + return buildScanner(tokenBuilder, schema, tableInfo); + } catch ( + Exception e) { + LOG.error("connect kudu is error:" + e.getMessage()); + } + return null; + } + + + /** + * @param builder 创建AsyncKuduScanner对象 + * @param schema kudu中表约束 + * @param tableInfo AsyncKuduScanner的配置信息 + * @return + */ + private KuduScanner buildScanner(KuduScanner.KuduScannerBuilder builder, Schema schema, KuduSideTableInfo + tableInfo) { + Integer batchSizeBytes = tableInfo.getBatchSizeBytes(); + Long limitNum = tableInfo.getLimitNum(); + Boolean isFaultTolerant = tableInfo.getFaultTolerant(); + //查询需要的字段 + String[] sideFieldNames = sideInfo.getSideSelectFields().split(","); + //主键过滤条件 主键最小值 + String lowerBoundPrimaryKey = tableInfo.getLowerBoundPrimaryKey(); + //主键过滤条件 主键最大值 + String upperBoundPrimaryKey = tableInfo.getUpperBoundPrimaryKey(); + //主键字段 + String primaryKeys = tableInfo.getPrimaryKey(); + if (null == limitNum || limitNum <= 0) { + builder.limit(FETCH_SIZE); + } else { + builder.limit(limitNum); + } + if (null != batchSizeBytes) { + builder.batchSizeBytes(batchSizeBytes); + } + if (null != isFaultTolerant) { + builder.setFaultTolerant(isFaultTolerant); + } + + if (null != lowerBoundPrimaryKey && null != upperBoundPrimaryKey && null != primaryKeys) { + List columnSchemas = schema.getPrimaryKeyColumns(); + Map columnName = new HashMap(columnSchemas.size()); + for (int i = 0; i < columnSchemas.size(); i++) { + columnName.put(columnSchemas.get(i).getName(), i); + } + String[] primaryKey = splitString(primaryKeys); + String[] lowerBounds = splitString(lowerBoundPrimaryKey); + String[] upperBounds = splitString(upperBoundPrimaryKey); + for (int i = 0; i < primaryKey.length; i++) { + Integer index = columnName.get(primaryKey[i]); + if (null != index) { + builder.lowerBound(primaryKeyRange(columnSchemas.get(index).getType(), primaryKey[i], lowerBounds[i], schema)); + builder.exclusiveUpperBound(primaryKeyRange(columnSchemas.get(index).getType(), primaryKey[i], upperBounds[i], schema)); + } + } + } + List projectColumns = Arrays.asList(sideFieldNames); + return builder.setProjectedColumnNames(projectColumns).build(); + } + + private String[] splitString(String data) { + return data.split(","); + } + + private PartialRow primaryKeyRange(Type type, String primaryKey, String value, Schema schema) { + PartialRow partialRow = schema.newPartialRow(); + switch (type) { + case STRING: + partialRow.addString(primaryKey, value); + break; + case FLOAT: + partialRow.addFloat(primaryKey, Float.valueOf(value)); + break; + case INT8: + partialRow.addByte(primaryKey, Byte.valueOf(value)); + break; + case INT16: + partialRow.addShort(primaryKey, Short.valueOf(value)); + break; + case INT32: + partialRow.addInt(primaryKey, Integer.valueOf(value)); + break; + case INT64: + partialRow.addLong(primaryKey, Long.valueOf(value)); + break; + case DOUBLE: + partialRow.addDouble(primaryKey, Double.valueOf(value)); + break; + case BOOL: + partialRow.addBoolean(primaryKey, Boolean.valueOf(value)); + break; + case UNIXTIME_MICROS: + partialRow.addTimestamp(primaryKey, Timestamp.valueOf(value)); + break; + case BINARY: + partialRow.addBinary(primaryKey, value.getBytes()); + break; + default: + throw new IllegalArgumentException("Illegal var type: " + type); + } + return partialRow; + } + + private void setMapValue(Type type, Map oneRow, String sideFieldName, RowResult result) { + switch (type) { + case STRING: + oneRow.put(sideFieldName, result.getString(sideFieldName)); + break; + case FLOAT: + oneRow.put(sideFieldName, result.getFloat(sideFieldName)); + break; + case INT8: + oneRow.put(sideFieldName, result.getFloat(sideFieldName)); + break; + case INT16: + oneRow.put(sideFieldName, result.getShort(sideFieldName)); + break; + case INT32: + oneRow.put(sideFieldName, result.getInt(sideFieldName)); + break; + case INT64: + oneRow.put(sideFieldName, result.getLong(sideFieldName)); + break; + case DOUBLE: + oneRow.put(sideFieldName, result.getDouble(sideFieldName)); + break; + case BOOL: + oneRow.put(sideFieldName, result.getBoolean(sideFieldName)); + break; + case UNIXTIME_MICROS: + oneRow.put(sideFieldName, result.getTimestamp(sideFieldName)); + break; + case BINARY: + oneRow.put(sideFieldName, result.getBinary(sideFieldName)); + break; + default: + throw new IllegalArgumentException("Illegal var type: " + type); + } + } + + @Override + public void close() throws Exception { + //公用一个client 如果每次刷新间隔时间较长可以每次获取一个 + super.close(); + if (null != client) { + try { + client.close(); + } catch (Exception e) { + LOG.error("Error while closing client.", e); + } + } + } +} diff --git a/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllSideInfo.java b/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllSideInfo.java new file mode 100644 index 000000000..fd933a38a --- /dev/null +++ b/kudu/kudu-side/kudu-all-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAllSideInfo.java @@ -0,0 +1,88 @@ +package com.dtstack.flink.sql.side.kudu; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.kudu.table.KuduSideTableInfo; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.commons.collections.CollectionUtils; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; + +import java.util.List; + +public class KuduAllSideInfo extends SideInfo { + + private static final long serialVersionUID = 9005389633060174746L; + + public KuduAllSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { + KuduSideTableInfo kuduSideTableInfo = (KuduSideTableInfo) sideTableInfo; + + sqlCondition = "select ${selectField} from ${tableName} "; + sqlCondition = sqlCondition.replace("${tableName}", kuduSideTableInfo.getTableName()).replace("${selectField}", sideSelectFields); + System.out.println("---------side_exe_sql-----\n" + sqlCondition); + } + + @Override + public void parseSelectFields(JoinInfo joinInfo) { + String sideTableName = joinInfo.getSideTableName(); + String nonSideTableName = joinInfo.getNonSideTable(); + List fields = Lists.newArrayList(); + + int sideIndex = 0; + for (int i = 0; i < outFieldInfoList.size(); i++) { + FieldInfo fieldInfo = outFieldInfoList.get(i); + if (fieldInfo.getTable().equalsIgnoreCase(sideTableName)) { + fields.add(fieldInfo.getFieldName()); + sideFieldIndex.put(i, sideIndex); + sideFieldNameIndex.put(i, fieldInfo.getFieldName()); + sideIndex++; + } else if (fieldInfo.getTable().equalsIgnoreCase(nonSideTableName)) { + int nonSideIndex = rowTypeInfo.getFieldIndex(fieldInfo.getFieldName()); + inFieldIndex.put(i, nonSideIndex); + } else { + throw new RuntimeException("unknown table " + fieldInfo.getTable()); + } + } + + if (fields.size() == 0) { + throw new RuntimeException("select non field from table " + sideTableName); + } + + //add join on condition field to select fields + SqlNode conditionNode = joinInfo.getCondition(); + + List sqlNodeList = Lists.newArrayList(); + if (conditionNode.getKind() == SqlKind.AND) { + sqlNodeList.addAll(Lists.newArrayList(((SqlBasicCall) conditionNode).getOperands())); + } else { + sqlNodeList.add(conditionNode); + } + + for (SqlNode sqlNode : sqlNodeList) { + dealOneEqualCon(sqlNode, sideTableName); + } + + if (CollectionUtils.isEmpty(equalFieldList)) { + throw new RuntimeException("no join condition found after table " + joinInfo.getLeftTableName()); + } + + for (String equalField : equalFieldList) { + if (fields.contains(equalField)) { + continue; + } + + fields.add(equalField); + } + + sideSelectFields = String.join(",", fields); + } +} diff --git a/kudu/kudu-side/kudu-async-side/pom.xml b/kudu/kudu-side/kudu-async-side/pom.xml new file mode 100644 index 000000000..6ab098ea9 --- /dev/null +++ b/kudu/kudu-side/kudu-async-side/pom.xml @@ -0,0 +1,107 @@ + + + + sql.side.kudu + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.async.kudu + + kudu-async-side + + jar + + + + + + + io.vertx + vertx-jdbc-client + 3.5.2 + + + + io.vertx + vertx-core + 3.5.2 + + + + + com.dtstack.flink + sql.side.kudu.core + 1.0-SNAPSHOT + + + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncReqRow.java b/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncReqRow.java new file mode 100644 index 000000000..10fee59f4 --- /dev/null +++ b/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncReqRow.java @@ -0,0 +1,320 @@ +package com.dtstack.flink.sql.side.kudu; + +import com.dtstack.flink.sql.enums.ECacheContentType; +import com.dtstack.flink.sql.side.*; +import com.dtstack.flink.sql.side.cache.CacheObj; +import com.dtstack.flink.sql.side.kudu.table.KuduSideTableInfo; +import com.stumbleupon.async.Callback; +import com.stumbleupon.async.Deferred; +import io.vertx.core.json.JsonArray; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import org.apache.flink.streaming.api.functions.async.ResultFuture; +import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; +import org.apache.flink.types.Row; +import org.apache.flink.util.Preconditions; +import org.apache.kudu.ColumnSchema; +import org.apache.kudu.Schema; +import org.apache.kudu.Type; +import org.apache.kudu.client.*; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Timestamp; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class KuduAsyncReqRow extends AsyncReqRow { + + private static final Logger LOG = LoggerFactory.getLogger(KuduAsyncReqRow.class); + /** + * 获取连接的尝试次数 + */ + private static final int CONN_RETRY_NUM = 3; + /** + * 缓存条数 + */ + private static final Long FETCH_SIZE = 1000L; + + private static final long serialVersionUID = 5028583854989267753L; + + + private AsyncKuduClient asyncClient; + + private KuduTable table; + + private KuduSideTableInfo kuduSideTableInfo; + + private AsyncKuduScanner.AsyncKuduScannerBuilder scannerBuilder; + + public KuduAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new KuduAsyncSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + kuduSideTableInfo = (KuduSideTableInfo) sideInfo.getSideTableInfo(); + connKuDu(); + } + + /** + * 连接kudu中的表 + * + * @throws KuduException + */ + private void connKuDu() throws KuduException { + if (null == table) { + String kuduMasters = kuduSideTableInfo.getKuduMasters(); + String tableName = kuduSideTableInfo.getTableName(); + Integer workerCount = kuduSideTableInfo.getWorkerCount(); + Integer defaultSocketReadTimeoutMs = kuduSideTableInfo.getDefaultSocketReadTimeoutMs(); + Integer defaultOperationTimeoutMs = kuduSideTableInfo.getDefaultOperationTimeoutMs(); + + Preconditions.checkNotNull(kuduMasters, "kuduMasters could not be null"); + + AsyncKuduClient.AsyncKuduClientBuilder asyncKuduClientBuilder = new AsyncKuduClient.AsyncKuduClientBuilder(kuduMasters); + if (null != workerCount) { + asyncKuduClientBuilder.workerCount(workerCount); + } + if (null != defaultSocketReadTimeoutMs) { + asyncKuduClientBuilder.defaultSocketReadTimeoutMs(defaultSocketReadTimeoutMs); + } + + if (null != defaultOperationTimeoutMs) { + asyncKuduClientBuilder.defaultOperationTimeoutMs(defaultOperationTimeoutMs); + } + asyncClient = asyncKuduClientBuilder.build(); + if (!asyncClient.syncClient().tableExists(tableName)) { + throw new IllegalArgumentException("Table Open Failed , please check table exists"); + } + table = asyncClient.syncClient().openTable(tableName); + LOG.info("connect kudu is successed!"); + } + scannerBuilder = asyncClient.newScannerBuilder(table); + Integer batchSizeBytes = kuduSideTableInfo.getBatchSizeBytes(); + Long limitNum = kuduSideTableInfo.getLimitNum(); + Boolean isFaultTolerant = kuduSideTableInfo.getFaultTolerant(); + //查询需要的字段 + String[] sideFieldNames = sideInfo.getSideSelectFields().split(","); + + if (null == limitNum || limitNum <= 0) { + scannerBuilder.limit(FETCH_SIZE); + } else { + scannerBuilder.limit(limitNum); + } + if (null != batchSizeBytes) { + scannerBuilder.batchSizeBytes(batchSizeBytes); + } + if (null != isFaultTolerant) { + scannerBuilder.setFaultTolerant(isFaultTolerant); + } + + List projectColumns = Arrays.asList(sideFieldNames); + scannerBuilder.setProjectedColumnNames(projectColumns); + } + + + @Override + public void asyncInvoke(Row input, ResultFuture resultFuture) throws Exception { + //scannerBuilder 设置为null重新加载过滤条件 + scannerBuilder = null; + connKuDu(); + JsonArray inputParams = new JsonArray(); + Schema schema = table.getSchema(); + + for (Integer conValIndex : sideInfo.getEqualValIndex()) { + Object equalObj = input.getField(conValIndex); + if (equalObj == null) { + resultFuture.complete(null); + return; + } + //增加过滤条件 + scannerBuilder.addPredicate(KuduPredicate.newInListPredicate(schema.getColumn(sideInfo.getEqualFieldList().get(conValIndex)), Collections.singletonList(equalObj))); + inputParams.add(equalObj); + } + + String key = buildCacheKey(inputParams); + + if (openCache()) { + //判断数据是否已经加载到缓存中 + CacheObj val = getFromCache(key); + if (val != null) { + + if (ECacheContentType.MissVal == val.getType()) { + dealMissKey(input, resultFuture); + return; + } else if (ECacheContentType.SingleLine == val.getType()) { + Row row = fillData(input, val); + resultFuture.complete(Collections.singleton(row)); + } else if (ECacheContentType.MultiLine == val.getType()) { + List rowList = Lists.newArrayList(); + for (Object jsonArray : (List) val.getContent()) { + Row row = fillData(input, jsonArray); + rowList.add(row); + } + resultFuture.complete(rowList); + } else { + throw new RuntimeException("not support cache obj type " + val.getType()); + } + return; + } + } + List> cacheContent = Lists.newArrayList(); + AsyncKuduScanner asyncKuduScanner = scannerBuilder.build(); + List rowList = Lists.newArrayList(); + Deferred data = asyncKuduScanner.nextRows(); + //从之前的同步修改为调用异步的Callback + data.addCallbackDeferring(new GetListRowCB(input, cacheContent, rowList, asyncKuduScanner, resultFuture, key)); + } + + + @Override + public Row fillData(Row input, Object sideInput) { + Map cacheInfo = (Map) sideInput; + Row row = new Row(sideInfo.getOutFieldInfoList().size()); + for (Map.Entry entry : sideInfo.getInFieldIndex().entrySet()) { + Object obj = input.getField(entry.getValue()); + boolean isTimeIndicatorTypeInfo = TimeIndicatorTypeInfo.class.isAssignableFrom(sideInfo.getRowTypeInfo().getTypeAt(entry.getValue()).getClass()); + + //Type information for indicating event or processing time. However, it behaves like a regular SQL timestamp but is serialized as Long. + if (obj instanceof Timestamp && isTimeIndicatorTypeInfo) { + obj = ((Timestamp) obj).getTime(); + } + row.setField(entry.getKey(), obj); + } + + for (Map.Entry entry : sideInfo.getSideFieldNameIndex().entrySet()) { + if (cacheInfo == null) { + row.setField(entry.getKey(), null); + } else { + row.setField(entry.getKey(), cacheInfo.get(entry.getValue())); + } + } + + return row; + } + + public String buildCacheKey(JsonArray jsonArray) { + StringBuilder sb = new StringBuilder(); + for (Object ele : jsonArray.getList()) { + sb.append(ele.toString()) + .append("_"); + } + + return sb.toString(); + } + + @Override + public void close() throws Exception { + super.close(); + if (null != asyncClient) { + try { + asyncClient.close(); + } catch (Exception e) { + LOG.error("Error while closing client.", e); + } + } + } + + private void setMapValue(Type type, Map oneRow, String sideFieldName, RowResult result) { + switch (type) { + case STRING: + oneRow.put(sideFieldName, result.getString(sideFieldName)); + break; + case FLOAT: + oneRow.put(sideFieldName, result.getFloat(sideFieldName)); + break; + case INT8: + oneRow.put(sideFieldName, result.getFloat(sideFieldName)); + break; + case INT16: + oneRow.put(sideFieldName, result.getShort(sideFieldName)); + break; + case INT32: + oneRow.put(sideFieldName, result.getInt(sideFieldName)); + break; + case INT64: + oneRow.put(sideFieldName, result.getLong(sideFieldName)); + break; + case DOUBLE: + oneRow.put(sideFieldName, result.getDouble(sideFieldName)); + break; + case BOOL: + oneRow.put(sideFieldName, result.getBoolean(sideFieldName)); + break; + case UNIXTIME_MICROS: + oneRow.put(sideFieldName, result.getTimestamp(sideFieldName)); + break; + case BINARY: + oneRow.put(sideFieldName, result.getBinary(sideFieldName)); + break; + default: + throw new IllegalArgumentException("Illegal var type: " + type); + } + } + + class GetListRowCB implements Callback>, RowResultIterator> { + private Row input; + private List> cacheContent; + private List rowList; + private AsyncKuduScanner asyncKuduScanner; + private ResultFuture resultFuture; + private String key; + + + public GetListRowCB() { + } + + GetListRowCB(Row input, List> cacheContent, List rowList, AsyncKuduScanner asyncKuduScanner, ResultFuture resultFuture, String key) { + this.input = input; + this.cacheContent = cacheContent; + this.rowList = rowList; + this.asyncKuduScanner = asyncKuduScanner; + this.resultFuture = resultFuture; + this.key = key; + } + + @Override + public Deferred> call(RowResultIterator results) throws Exception { + for (RowResult result : results) { + Map oneRow = Maps.newHashMap(); + for (String sideFieldName1 : sideInfo.getSideSelectFields().split(",")) { + String sideFieldName = sideFieldName1.trim(); + ColumnSchema columnSchema = table.getSchema().getColumn(sideFieldName); + if (null != columnSchema) { + setMapValue(columnSchema.getType(), oneRow, sideFieldName, result); + } + } + Row row = fillData(input, oneRow); + if (openCache()) { + cacheContent.add(oneRow); + } + rowList.add(row); + } + if (asyncKuduScanner.hasMoreRows()) { + return asyncKuduScanner.nextRows().addCallbackDeferring(this); + } + + if (rowList.size() > 0) { + if (openCache()) { + putCache(key, CacheObj.buildCacheObj(ECacheContentType.MultiLine, cacheContent)); + } + resultFuture.complete(rowList); + } else { + dealMissKey(input, resultFuture); + if (openCache()) { + //放置在putCache的Miss中 一段时间内同一个key都会直接返回 + putCache(key, CacheMissVal.getMissKeyObj()); + } + } + + return null; + } + } + +} diff --git a/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncSideInfo.java b/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncSideInfo.java new file mode 100644 index 000000000..f341f9a8a --- /dev/null +++ b/kudu/kudu-side/kudu-async-side/src/main/java/com/dtstack/flink/sql/side/kudu/KuduAsyncSideInfo.java @@ -0,0 +1,99 @@ +package com.dtstack.flink.sql.side.kudu; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.kudu.table.KuduSideTableInfo; +import org.apache.calcite.sql.SqlBasicCall; +import org.apache.calcite.sql.SqlIdentifier; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.SqlNode; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; + +import java.util.List; + +public class KuduAsyncSideInfo extends SideInfo { + + + public KuduAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } + + @Override + public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { + KuduSideTableInfo kuduSideTableInfo = (KuduSideTableInfo) sideTableInfo; + + String sideTableName = joinInfo.getSideTableName(); + + SqlNode conditionNode = joinInfo.getCondition(); + + List sqlNodeList = Lists.newArrayList(); + if (conditionNode.getKind() == SqlKind.AND) { + sqlNodeList.addAll(Lists.newArrayList(((SqlBasicCall) conditionNode).getOperands())); + } else { + sqlNodeList.add(conditionNode); + } + + for (SqlNode sqlNode : sqlNodeList) { + dealOneEqualCon(sqlNode, sideTableName); + } + + sqlCondition = "select ${selectField} from ${tableName} "; + sqlCondition = sqlCondition.replace("${tableName}", kuduSideTableInfo.getTableName()).replace("${selectField}", sideSelectFields); + System.out.println("---------side_exe_sql-----\n" + sqlCondition); + } + + @Override + public void dealOneEqualCon(SqlNode sqlNode, String sideTableName) { + if (sqlNode.getKind() != SqlKind.EQUALS) { + throw new RuntimeException("not equal operator."); + } + + SqlIdentifier left = (SqlIdentifier) ((SqlBasicCall) sqlNode).getOperands()[0]; + SqlIdentifier right = (SqlIdentifier) ((SqlBasicCall) sqlNode).getOperands()[1]; + + String leftTableName = left.getComponent(0).getSimple(); + String leftField = left.getComponent(1).getSimple(); + + String rightTableName = right.getComponent(0).getSimple(); + String rightField = right.getComponent(1).getSimple(); + + if (leftTableName.equalsIgnoreCase(sideTableName)) { + equalFieldList.add(leftField); + int equalFieldIndex = -1; + for (int i = 0; i < rowTypeInfo.getFieldNames().length; i++) { + String fieldName = rowTypeInfo.getFieldNames()[i]; + if (fieldName.equalsIgnoreCase(rightField)) { + equalFieldIndex = i; + } + } + if (equalFieldIndex == -1) { + throw new RuntimeException("can't deal equal field: " + sqlNode); + } + + equalValIndex.add(equalFieldIndex); + + } else if (rightTableName.equalsIgnoreCase(sideTableName)) { + + equalFieldList.add(rightField); + int equalFieldIndex = -1; + for (int i = 0; i < rowTypeInfo.getFieldNames().length; i++) { + String fieldName = rowTypeInfo.getFieldNames()[i]; + if (fieldName.equalsIgnoreCase(leftField)) { + equalFieldIndex = i; + } + } + if (equalFieldIndex == -1) { + throw new RuntimeException("can't deal equal field: " + sqlNode.toString()); + } + + equalValIndex.add(equalFieldIndex); + + } else { + throw new RuntimeException("resolve equalFieldList error:" + sqlNode.toString()); + } + + } +} diff --git a/kudu/kudu-side/kudu-side-core/pom.xml b/kudu/kudu-side/kudu-side-core/pom.xml new file mode 100644 index 000000000..3d0a28ac4 --- /dev/null +++ b/kudu/kudu-side/kudu-side-core/pom.xml @@ -0,0 +1,24 @@ + + + + sql.side.kudu + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.kudu.core + + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + jar + \ No newline at end of file diff --git a/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideParser.java b/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideParser.java new file mode 100644 index 000000000..ea451286c --- /dev/null +++ b/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideParser.java @@ -0,0 +1,74 @@ +package com.dtstack.flink.sql.side.kudu.table; + +import com.dtstack.flink.sql.table.AbsSideTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +import static com.dtstack.flink.sql.table.TableInfo.PARALLELISM_KEY; + +public class KuduSideParser extends AbsSideTableParser { + + + public static final String KUDU_MASTERS = "kuduMasters"; + + public static final String TABLE_NAME = "tableName"; + + public static final String WORKER_COUNT = "workerCount"; + + public static final String OPERATION_TIMEOUT_MS = "defaultOperationTimeoutMs"; + + public static final String SOCKET_READ_TIMEOUT_MS = "defaultSocketReadTimeoutMs"; + + /** + * 查询返回的最大字节数 + */ + public static final String BATCH_SIZE_BYTES = "batchSizeBytes"; + /** + * 查询返回数据条数 + */ + public static final String LIMIT_NUM = "limitNum"; + + /** + * 查询是否容错 查询失败是否扫描第二个副本 默认false 容错 + */ + public static final String IS_FAULT_TO_LERANT = "isFaultTolerant"; + /** + * 需要过滤的主键 + */ + public static final String PRIMARY_KEY = "primaryKey"; + /** + * 过滤主键的最小值 + */ + public static final String LOWER_BOUND_PRIMARY_KEY = "lowerBoundPrimaryKey"; + /** + * 过滤主键的最大值 不包含 + */ + public static final String UPPER_BOUND_PRIMARY_KEY = "upperBoundPrimaryKey"; + + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KuduSideTableInfo kuduSideTableInfo = new KuduSideTableInfo(); + kuduSideTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kuduSideTableInfo); + + parseCacheProp(kuduSideTableInfo, props); + + kuduSideTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PARALLELISM_KEY.toLowerCase()))); + kuduSideTableInfo.setKuduMasters(MathUtil.getString(props.get(KUDU_MASTERS.toLowerCase()))); + kuduSideTableInfo.setTableName(MathUtil.getString(props.get(TABLE_NAME.toLowerCase()))); + kuduSideTableInfo.setWorkerCount(MathUtil.getIntegerVal(props.get(WORKER_COUNT.toLowerCase()))); + kuduSideTableInfo.setDefaultOperationTimeoutMs(MathUtil.getIntegerVal(props.get(OPERATION_TIMEOUT_MS.toLowerCase()))); + kuduSideTableInfo.setDefaultSocketReadTimeoutMs(MathUtil.getIntegerVal(props.get(SOCKET_READ_TIMEOUT_MS.toLowerCase()))); + kuduSideTableInfo.setBatchSizeBytes(MathUtil.getIntegerVal(props.get(BATCH_SIZE_BYTES.toLowerCase()))); + kuduSideTableInfo.setLimitNum(MathUtil.getLongVal(props.get(LIMIT_NUM.toLowerCase()))); + kuduSideTableInfo.setFaultTolerant(MathUtil.getBoolean(props.get(IS_FAULT_TO_LERANT.toLowerCase()))); + kuduSideTableInfo.setPrimaryKey(MathUtil.getString(props.get(PRIMARY_KEY.toLowerCase()))); + kuduSideTableInfo.setLowerBoundPrimaryKey(MathUtil.getString(props.get(LOWER_BOUND_PRIMARY_KEY.toLowerCase()))); + kuduSideTableInfo.setUpperBoundPrimaryKey(MathUtil.getString(props.get(UPPER_BOUND_PRIMARY_KEY.toLowerCase()))); + return kuduSideTableInfo; + + } +} diff --git a/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideTableInfo.java b/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideTableInfo.java new file mode 100644 index 000000000..478140433 --- /dev/null +++ b/kudu/kudu-side/kudu-side-core/src/main/java/com/dtstack/flink/sql/side/kudu/table/KuduSideTableInfo.java @@ -0,0 +1,155 @@ +package com.dtstack.flink.sql.side.kudu.table; + +import com.dtstack.flink.sql.side.SideTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +public class KuduSideTableInfo extends SideTableInfo { + + private static final String CURR_TYPE = "kudu"; + + private static final long serialVersionUID = 1085582743577521861L; + + private String kuduMasters; + + private String tableName; + + private Integer workerCount; + + private Integer defaultOperationTimeoutMs; + + private Integer defaultSocketReadTimeoutMs; + /** + * 查询返回的最大字节数 + */ + private Integer batchSizeBytes; + + /** + * 查询返回数据条数 + */ + private Long limitNum; + /** + * 查询是否容错 查询失败是否扫描第二个副本 默认false 容错 + */ + private Boolean isFaultTolerant; + + /** + * 需要过滤的主键 + */ + private String primaryKey; + /** + * 过滤主键的最小值 + */ + private String lowerBoundPrimaryKey; + /** + * 过滤主键的最大值 不包含 + */ + private String upperBoundPrimaryKey; + + + public KuduSideTableInfo() { + setType(CURR_TYPE); + } + + + public String getKuduMasters() { + return kuduMasters; + } + + public void setKuduMasters(String kuduMasters) { + this.kuduMasters = kuduMasters; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public Integer getWorkerCount() { + return workerCount; + } + + public void setWorkerCount(Integer workerCount) { + this.workerCount = workerCount; + } + + public Integer getDefaultOperationTimeoutMs() { + return defaultOperationTimeoutMs; + } + + public void setDefaultOperationTimeoutMs(Integer defaultOperationTimeoutMs) { + this.defaultOperationTimeoutMs = defaultOperationTimeoutMs; + } + + public Integer getDefaultSocketReadTimeoutMs() { + return defaultSocketReadTimeoutMs; + } + + public void setDefaultSocketReadTimeoutMs(Integer defaultSocketReadTimeoutMs) { + this.defaultSocketReadTimeoutMs = defaultSocketReadTimeoutMs; + } + + public Integer getBatchSizeBytes() { + return batchSizeBytes; + } + + public void setBatchSizeBytes(Integer batchSizeBytes) { + this.batchSizeBytes = batchSizeBytes; + } + + public Long getLimitNum() { + return limitNum; + } + + public void setLimitNum(Long limitNum) { + this.limitNum = limitNum; + } + + public Boolean getFaultTolerant() { + return isFaultTolerant; + } + + public void setFaultTolerant(Boolean faultTolerant) { + isFaultTolerant = faultTolerant; + } + + public String getLowerBoundPrimaryKey() { + return lowerBoundPrimaryKey; + } + + public void setLowerBoundPrimaryKey(String lowerBoundPrimaryKey) { + this.lowerBoundPrimaryKey = lowerBoundPrimaryKey; + } + + public String getUpperBoundPrimaryKey() { + return upperBoundPrimaryKey; + } + + public void setUpperBoundPrimaryKey(String upperBoundPrimaryKey) { + this.upperBoundPrimaryKey = upperBoundPrimaryKey; + } + + public String getPrimaryKey() { + return primaryKey; + } + + public void setPrimaryKey(String primaryKey) { + this.primaryKey = primaryKey; + } + + + @Override + public boolean check() { + Preconditions.checkNotNull(kuduMasters, "Cassandra field of kuduMasters is required"); + Preconditions.checkNotNull(tableName, "Cassandra field of tableName is required"); + return true; + } + + @Override + public String getType() { + // return super.getType().toLowerCase() + TARGET_SUFFIX; + return super.getType().toLowerCase(); + } +} diff --git a/kudu/kudu-side/pom.xml b/kudu/kudu-side/pom.xml new file mode 100644 index 000000000..db3ea5965 --- /dev/null +++ b/kudu/kudu-side/pom.xml @@ -0,0 +1,23 @@ + + + + sql.kudu + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.kudu + kudu-side + + kudu-side-core + kudu-all-side + kudu-async-side + + + + pom + \ No newline at end of file diff --git a/kudu/kudu-sink/pom.xml b/kudu/kudu-sink/pom.xml new file mode 100644 index 000000000..9c1bb2a6d --- /dev/null +++ b/kudu/kudu-sink/pom.xml @@ -0,0 +1,81 @@ + + + + sql.kudu + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.kudu + jar + + + kudu-sink + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + org.slf4j + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduOutputFormat.java b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduOutputFormat.java new file mode 100644 index 000000000..05259ee9b --- /dev/null +++ b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduOutputFormat.java @@ -0,0 +1,308 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.sink.kudu; + +import com.dtstack.flink.sql.sink.MetricOutputFormat; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.configuration.Configuration; +import org.apache.flink.types.Row; +import org.apache.kudu.client.*; + +import java.io.IOException; +import java.math.BigDecimal; +import java.sql.Timestamp; +import java.util.Date; + +public class KuduOutputFormat extends MetricOutputFormat { + + public enum WriteMode {INSERT, UPDATE, UPSERT} + +// public enum Consistency {EVENTUAL, STRONG} + + private String kuduMasters; + + private String tableName; + + private WriteMode writeMode; + + protected String[] fieldNames; + + TypeInformation[] fieldTypes; + +// protected List primaryKeys; + +// private Consistency consistency = Consistency.STRONG; + + private AsyncKuduClient client; + + private KuduTable table; + + + private Integer workerCount; + + private Integer defaultOperationTimeoutMs; + + private Integer defaultSocketReadTimeoutMs; + + + private KuduOutputFormat() { + } + + @Override + public void configure(Configuration parameters) { + + } + + @Override + public void open(int taskNumber, int numTasks) throws IOException { + establishConnection(); + initMetric(); + } + + + private void establishConnection() throws KuduException { + AsyncKuduClient.AsyncKuduClientBuilder asyncKuduClientBuilder = new AsyncKuduClient.AsyncKuduClientBuilder(kuduMasters); + if (null != workerCount) { + asyncKuduClientBuilder.workerCount(workerCount); + } + if (null != defaultSocketReadTimeoutMs) { + asyncKuduClientBuilder.workerCount(defaultSocketReadTimeoutMs); + } + + if (null != defaultOperationTimeoutMs) { + asyncKuduClientBuilder.workerCount(defaultOperationTimeoutMs); + } + client = asyncKuduClientBuilder.build(); + KuduClient syncClient = client.syncClient(); + + if (syncClient.tableExists(tableName)) { + table = syncClient.openTable(tableName); + } + } + + @Override + public void writeRecord(Tuple2 record) throws IOException { + Tuple2 tupleTrans = record; + Boolean retract = tupleTrans.getField(0); + if (!retract) { + return; + } + Row row = tupleTrans.getField(1); + if (row.getArity() != fieldNames.length) { + return; + } + + Operation operation = toOperation(writeMode, row); + AsyncKuduSession session = client.newSession(); + session.apply(operation); + session.close(); + outRecords.inc(); + } + + @Override + public void close() { + if (null != client) { + try { + client.close(); + } catch (Exception e) { + throw new IllegalArgumentException("[closeKudu]:" + e.getMessage()); + } + } + } + + public static KuduOutputFormatBuilder buildKuduOutputFormat() { + return new KuduOutputFormatBuilder(); + } + + public static class KuduOutputFormatBuilder { + private final KuduOutputFormat kuduOutputFormat; + + protected KuduOutputFormatBuilder() { + this.kuduOutputFormat = new KuduOutputFormat(); + } + + public KuduOutputFormatBuilder setKuduMasters(String kuduMasters) { + kuduOutputFormat.kuduMasters = kuduMasters; + return this; + } + + public KuduOutputFormatBuilder setTableName(String tableName) { + kuduOutputFormat.tableName = tableName; + return this; + } + + + public KuduOutputFormatBuilder setFieldNames(String[] fieldNames) { + kuduOutputFormat.fieldNames = fieldNames; + return this; + } + + public KuduOutputFormatBuilder setFieldTypes(TypeInformation[] fieldTypes) { + kuduOutputFormat.fieldTypes = fieldTypes; + return this; + } +// +// public KuduOutputFormatBuilder setPrimaryKeys(List primaryKeys) { +// kuduOutputFormat.primaryKeys = primaryKeys; +// return this; +// } + + public KuduOutputFormatBuilder setWriteMode(WriteMode writeMode) { + if (null == writeMode) { + kuduOutputFormat.writeMode = WriteMode.UPSERT; + } + kuduOutputFormat.writeMode = writeMode; + return this; + } + + public KuduOutputFormatBuilder setWorkerCount(Integer workerCount) { + kuduOutputFormat.workerCount = workerCount; + return this; + } + +// public KuduOutputFormatBuilder setConsistency(String consistency) { +// switch (consistency) { +// case "EVENTUAL": +// kuduOutputFormat.consistency = Consistency.EVENTUAL; +// break; +// case "STRONG": +// kuduOutputFormat.consistency = Consistency.STRONG; +// break; +// default: +// kuduOutputFormat.consistency = Consistency.STRONG; +// } +// return this; +// } + + + public KuduOutputFormatBuilder setDefaultOperationTimeoutMs(Integer defaultOperationTimeoutMs) { + kuduOutputFormat.defaultOperationTimeoutMs = defaultOperationTimeoutMs; + return this; + } + + public KuduOutputFormatBuilder setDefaultSocketReadTimeoutMs(Integer defaultSocketReadTimeoutMs) { + kuduOutputFormat.defaultSocketReadTimeoutMs = defaultSocketReadTimeoutMs; + return this; + } + + + public KuduOutputFormat finish() { + if (kuduOutputFormat.kuduMasters == null) { + throw new IllegalArgumentException("No kuduMasters supplied."); + } + + if (kuduOutputFormat.tableName == null) { + throw new IllegalArgumentException("No tablename supplied."); + } + + return kuduOutputFormat; + } + } + + private Operation toOperation(WriteMode writeMode, Row row) { + if (null == table) { + throw new IllegalArgumentException("Table Open Failed , please check table exists"); + } + Operation operation = toOperation(writeMode); + PartialRow partialRow = operation.getRow(); + + for (int index = 0; index < row.getArity(); index++) { + //解决kudu中全小写字段找不到的bug + String fieldName = fieldNames[index].toLowerCase(); + if (row.getField(index) == null) { + partialRow.setNull(fieldName); + } else { + if (fieldTypes[index].getTypeClass() == String.class) { + partialRow.addString(fieldName, (String) row.getField(index)); + continue; + } + if (fieldTypes[index].getTypeClass() == Float.class) { + partialRow.addFloat(fieldName, (Float) row.getField(index)); + continue; + } + if (fieldTypes[index].getTypeClass() == Byte.class) { + partialRow.addByte(fieldName, (Byte) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == Short.class) { + partialRow.addShort(fieldName, (Short) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == Integer.class) { + partialRow.addInt(fieldName, (Integer) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == Long.class) { + partialRow.addLong(fieldName, (Long) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == Double.class) { + partialRow.addDouble(fieldName, (Double) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == BigDecimal.class) { + partialRow.addDecimal(fieldName, (BigDecimal) row.getField(index)); + continue; + } + if (fieldTypes[index].getTypeClass() == Boolean.class) { + partialRow.addBoolean(fieldName, (Boolean) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == Date.class) { + partialRow.addTimestamp(fieldName, new Timestamp(((Date) row.getField(index)).getTime())); + continue; + } + + if (fieldTypes[index].getTypeClass() == Timestamp.class) { + partialRow.addTimestamp(fieldName, (Timestamp) row.getField(index)); + continue; + } + + if (fieldTypes[index].getTypeClass() == byte[].class) { + partialRow.addBinary(fieldName, (byte[]) row.getField(index)); + continue; + } + throw new IllegalArgumentException("Illegal var type: " + fieldTypes[index]); + } + } + return operation; + + } + + private Operation toOperation(WriteMode writeMode) { + switch (writeMode) { + case INSERT: + return table.newInsert(); + case UPDATE: + return table.newUpdate(); + case UPSERT: + return table.newUpsert(); + default: + return table.newUpsert(); + } + } + +} \ No newline at end of file diff --git a/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduSink.java b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduSink.java new file mode 100644 index 000000000..24dff85ae --- /dev/null +++ b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/KuduSink.java @@ -0,0 +1,104 @@ +package com.dtstack.flink.sql.sink.kudu; + + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.kudu.table.KuduTableInfo; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.java.tuple.Tuple2; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.api.java.typeutils.TupleTypeInfo; +import org.apache.flink.streaming.api.datastream.DataStream; +import org.apache.flink.streaming.api.functions.sink.OutputFormatSinkFunction; +import org.apache.flink.streaming.api.functions.sink.RichSinkFunction; +import org.apache.flink.table.sinks.RetractStreamTableSink; +import org.apache.flink.table.sinks.TableSink; +import org.apache.flink.types.Row; + +import java.io.Serializable; + +public class KuduSink implements RetractStreamTableSink, Serializable, IStreamSinkGener { + + private String kuduMasters; + + private String tableName; + + private KuduOutputFormat.WriteMode writeMode; + + protected String[] fieldNames; + + TypeInformation[] fieldTypes; + +// protected List primaryKeys; + +// private KuduOutputFormat.Consistency consistency = KuduOutputFormat.Consistency.STRONG; + + + private Integer workerCount; + + private Integer defaultOperationTimeoutMs; + + private Integer defaultSocketReadTimeoutMs; + + private int parallelism = -1; + + @Override + public KuduSink genStreamSink(TargetTableInfo targetTableInfo) { + KuduTableInfo kuduTableInfo = (KuduTableInfo) targetTableInfo; + this.kuduMasters = kuduTableInfo.getKuduMasters(); + this.tableName = kuduTableInfo.getTableName(); + this.defaultOperationTimeoutMs = kuduTableInfo.getDefaultOperationTimeoutMs(); + this.defaultSocketReadTimeoutMs = kuduTableInfo.getDefaultSocketReadTimeoutMs(); + this.workerCount = kuduTableInfo.getWorkerCount(); + this.writeMode = kuduTableInfo.getWriteMode(); + + return this; + } + + @Override + public void emitDataStream(DataStream> dataStream) { + KuduOutputFormat.KuduOutputFormatBuilder builder = KuduOutputFormat.buildKuduOutputFormat(); + builder.setKuduMasters(this.kuduMasters) + .setTableName(this.tableName) + .setWriteMode(writeMode) + .setWorkerCount(this.workerCount) + .setDefaultOperationTimeoutMs(this.defaultOperationTimeoutMs) + .setDefaultSocketReadTimeoutMs(this.defaultSocketReadTimeoutMs) + .setFieldNames(this.fieldNames) + .setFieldTypes(this.fieldTypes); + KuduOutputFormat kuduOutputFormat = builder.finish(); + RichSinkFunction richSinkFunction = new OutputFormatSinkFunction(kuduOutputFormat); + dataStream.addSink(richSinkFunction); + } + + @Override + public TableSink> configure(String[] fieldNames, TypeInformation[] fieldTypes) { + this.fieldNames = fieldNames; + this.fieldTypes = fieldTypes; + return this; + } + + + @Override + public TupleTypeInfo> getOutputType() { + return new TupleTypeInfo(org.apache.flink.table.api.Types.BOOLEAN(), getRecordType()); + } + + @Override + public TypeInformation getRecordType() { + return new RowTypeInfo(fieldTypes, fieldNames); + } + + + @Override + public String[] getFieldNames() { + return fieldNames; + } + + @Override + public TypeInformation[] getFieldTypes() { + return fieldTypes; + } + + +} diff --git a/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduSinkParser.java b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduSinkParser.java new file mode 100644 index 000000000..f304c14b5 --- /dev/null +++ b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduSinkParser.java @@ -0,0 +1,54 @@ +package com.dtstack.flink.sql.sink.kudu.table; + +import com.dtstack.flink.sql.sink.kudu.KuduOutputFormat; +import com.dtstack.flink.sql.table.AbsTableParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; + +import java.util.Map; + +import static com.dtstack.flink.sql.table.TableInfo.PARALLELISM_KEY; + +public class KuduSinkParser extends AbsTableParser { + + public static final String KUDU_MASTERS = "kuduMasters"; + + public static final String TABLE_NAME = "tableName"; + + public static final String WRITE_MODE = "writeMode"; + + public static final String WORKER_COUNT = "workerCount"; + + public static final String OPERATION_TIMEOUT_MS = "defaultOperationTimeoutMs"; + + public static final String SOCKET_READ_TIMEOUT_MS = "defaultSocketReadTimeoutMs"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + KuduTableInfo kuduTableInfo = new KuduTableInfo(); + kuduTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, kuduTableInfo); + + kuduTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PARALLELISM_KEY.toLowerCase()))); + kuduTableInfo.setKuduMasters(MathUtil.getString(props.get(KUDU_MASTERS.toLowerCase()))); + kuduTableInfo.setTableName(MathUtil.getString(props.get(TABLE_NAME.toLowerCase()))); + kuduTableInfo.setWriteMode(transWriteMode(MathUtil.getString(props.get(WRITE_MODE.toLowerCase())))); + kuduTableInfo.setWorkerCount(MathUtil.getIntegerVal(props.get(WORKER_COUNT.toLowerCase()))); + kuduTableInfo.setDefaultOperationTimeoutMs(MathUtil.getIntegerVal(props.get(OPERATION_TIMEOUT_MS.toLowerCase()))); + kuduTableInfo.setDefaultSocketReadTimeoutMs(MathUtil.getIntegerVal(props.get(SOCKET_READ_TIMEOUT_MS.toLowerCase()))); + return kuduTableInfo; + } + + private KuduOutputFormat.WriteMode transWriteMode(String writeMode) { + switch (writeMode) { + case "insert": + return KuduOutputFormat.WriteMode.INSERT; + case "update": + return KuduOutputFormat.WriteMode.UPDATE; + case "upsert": + return KuduOutputFormat.WriteMode.UPSERT; + default: + return KuduOutputFormat.WriteMode.UPSERT; + } + } +} diff --git a/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduTableInfo.java b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduTableInfo.java new file mode 100644 index 000000000..ed0b956a7 --- /dev/null +++ b/kudu/kudu-sink/src/main/java/com/dtstack/flink/sql/sink/kudu/table/KuduTableInfo.java @@ -0,0 +1,92 @@ +package com.dtstack.flink.sql.sink.kudu.table; + +import com.dtstack.flink.sql.sink.kudu.KuduOutputFormat; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +public class KuduTableInfo extends TargetTableInfo { + + private static final String CURR_TYPE = "kudu"; + + private String kuduMasters; + + private String tableName; + + private KuduOutputFormat.WriteMode writeMode; + + +// private KuduOutputFormat.Consistency consistency = KuduOutputFormat.Consistency.STRONG; + + + private Integer workerCount; + + private Integer defaultOperationTimeoutMs; + + private Integer defaultSocketReadTimeoutMs; + + public KuduTableInfo() { + setType(CURR_TYPE); + } + + + public String getKuduMasters() { + return kuduMasters; + } + + public void setKuduMasters(String kuduMasters) { + this.kuduMasters = kuduMasters; + } + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public KuduOutputFormat.WriteMode getWriteMode() { + return writeMode; + } + + public void setWriteMode(KuduOutputFormat.WriteMode writeMode) { + this.writeMode = writeMode; + } + + public Integer getWorkerCount() { + return workerCount; + } + + public void setWorkerCount(Integer workerCount) { + this.workerCount = workerCount; + } + + public Integer getDefaultOperationTimeoutMs() { + return defaultOperationTimeoutMs; + } + + public void setDefaultOperationTimeoutMs(Integer defaultOperationTimeoutMs) { + this.defaultOperationTimeoutMs = defaultOperationTimeoutMs; + } + + public Integer getDefaultSocketReadTimeoutMs() { + return defaultSocketReadTimeoutMs; + } + + public void setDefaultSocketReadTimeoutMs(Integer defaultSocketReadTimeoutMs) { + this.defaultSocketReadTimeoutMs = defaultSocketReadTimeoutMs; + } + + @Override + public boolean check() { + Preconditions.checkNotNull(kuduMasters, "Cassandra field of kuduMasters is required"); + Preconditions.checkNotNull(tableName, "Cassandra field of tableName is required"); + return true; + } + + @Override + public String getType() { + // return super.getType().toLowerCase() + TARGET_SUFFIX; + return super.getType().toLowerCase(); + } +} diff --git a/kudu/pom.xml b/kudu/pom.xml new file mode 100644 index 000000000..e29c233c1 --- /dev/null +++ b/kudu/pom.xml @@ -0,0 +1,45 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.kudu + pom + + kudu-sink + kudu-side + + + + + + junit + junit + 3.8.1 + test + + + + com.dtstack.flink + sql.core + 1.0-SNAPSHOT + provided + + + + org.apache.kudu + kudu-client + 1.9.0 + + + + + + \ No newline at end of file diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java index ce48b388c..0edd01434 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/LauncherMain.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.launcher; -import avro.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import com.alibaba.fastjson.JSON; import com.alibaba.fastjson.TypeReference; import com.dtstack.flink.sql.enums.ClusterMode; diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobClusterClientBuilder.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobClusterClientBuilder.java index 9157d219e..5dc0971bc 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobClusterClientBuilder.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobClusterClientBuilder.java @@ -18,10 +18,14 @@ package com.dtstack.flink.sql.launcher.perjob; +import com.dtstack.flink.sql.enums.EPluginLoadMode; import com.dtstack.flink.sql.launcher.YarnConfLoader; +import com.dtstack.flink.sql.option.Options; import org.apache.commons.lang3.StringUtils; +import org.apache.flink.api.common.cache.DistributedCache; import org.apache.flink.configuration.Configuration; -import org.apache.flink.hadoop.shaded.com.google.common.base.Strings; +import com.google.common.base.Strings; +import org.apache.flink.runtime.jobgraph.JobGraph; import org.apache.flink.yarn.AbstractYarnClusterDescriptor; import org.apache.flink.yarn.YarnClusterDescriptor; import org.apache.hadoop.fs.Path; @@ -30,8 +34,10 @@ import java.io.File; import java.net.MalformedURLException; +import java.net.URL; import java.util.ArrayList; import java.util.List; +import java.util.Map; import java.util.Properties; /** @@ -42,7 +48,6 @@ */ public class PerJobClusterClientBuilder { - private YarnClient yarnClient; private YarnConfiguration yarnConf; @@ -60,43 +65,71 @@ public void init(String yarnConfDir){ System.out.println("----init yarn success ----"); } - public AbstractYarnClusterDescriptor createPerJobClusterDescriptor(Properties confProp, String flinkJarPath, String queue) throws MalformedURLException { + public AbstractYarnClusterDescriptor createPerJobClusterDescriptor(Properties confProp, String flinkJarPath, Options launcherOptions, JobGraph jobGraph) throws MalformedURLException { Configuration newConf = new Configuration(); - confProp.forEach((key, val) -> newConf.setString(key.toString(), val.toString()) ); + confProp.forEach((key, val) -> newConf.setString(key.toString(), val.toString())); AbstractYarnClusterDescriptor clusterDescriptor = getClusterDescriptor(newConf, yarnConf, "."); if (StringUtils.isNotBlank(flinkJarPath)) { - if (!new File(flinkJarPath).exists()) { throw new RuntimeException("The Flink jar path is not exist"); } - } - List shipFiles = new ArrayList<>(); + List shipFiles = new ArrayList<>(); if (flinkJarPath != null) { File[] jars = new File(flinkJarPath).listFiles(); - - for (File file : jars){ - if (file.toURI().toURL().toString().contains("flink-dist")){ + for (File file : jars) { + if (file.toURI().toURL().toString().contains("flink-dist")) { clusterDescriptor.setLocalJarPath(new Path(file.toURI().toURL().toString())); } else { shipFiles.add(file); } } - } else { throw new RuntimeException("The Flink jar path is null"); } - clusterDescriptor.addShipFiles(shipFiles); + // classpath , all node need contain plugin jar + String pluginLoadMode = launcherOptions.getPluginLoadMode(); + if (StringUtils.equalsIgnoreCase(pluginLoadMode, EPluginLoadMode.CLASSPATH.name())) { + fillJobGraphClassPath(jobGraph); + } else if (StringUtils.equalsIgnoreCase(pluginLoadMode, EPluginLoadMode.SHIPFILE.name())) { + List pluginPaths = getPluginPathToShipFiles(jobGraph); + shipFiles.addAll(pluginPaths); + } else { + throw new IllegalArgumentException("Unsupported plugin loading mode " + pluginLoadMode + + " Currently only classpath and shipfile are supported."); + } - if(!Strings.isNullOrEmpty(queue)){ + clusterDescriptor.addShipFiles(shipFiles); + String queue = launcherOptions.getQueue(); + if (!Strings.isNullOrEmpty(queue)) { clusterDescriptor.setQueue(queue); } return clusterDescriptor; } + private static void fillJobGraphClassPath(JobGraph jobGraph) throws MalformedURLException { + Map jobCacheFileConfig = jobGraph.getUserArtifacts(); + for(Map.Entry tmp : jobCacheFileConfig.entrySet()){ + if(tmp.getKey().startsWith("class_path")){ + jobGraph.getClasspaths().add(new URL("file:" + tmp.getValue().filePath)); + } + } + } + + private List getPluginPathToShipFiles(JobGraph jobGraph) { + List shipFiles = new ArrayList<>(); + Map jobCacheFileConfig = jobGraph.getUserArtifacts(); + for(Map.Entry tmp : jobCacheFileConfig.entrySet()){ + if(tmp.getKey().startsWith("class_path")){ + shipFiles.add(new File(tmp.getValue().filePath)); + } + } + return shipFiles; + } + private AbstractYarnClusterDescriptor getClusterDescriptor( Configuration configuration, YarnConfiguration yarnConfiguration, diff --git a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java index ccc696a54..55b55be2f 100644 --- a/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java +++ b/launcher/src/main/java/com/dtstack/flink/sql/launcher/perjob/PerJobSubmitter.java @@ -22,7 +22,6 @@ import com.dtstack.flink.sql.util.PluginUtil; import org.apache.commons.io.Charsets; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.api.common.cache.DistributedCache; import org.apache.flink.client.deployment.ClusterSpecification; import org.apache.flink.client.program.ClusterClient; import org.apache.flink.core.fs.Path; @@ -31,13 +30,9 @@ import org.apache.hadoop.yarn.api.records.ApplicationId; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - -import java.net.MalformedURLException; -import java.net.URL; import java.net.URLDecoder; import java.util.Arrays; import java.util.List; -import java.util.Map; import java.util.Properties; /** @@ -52,8 +47,6 @@ public class PerJobSubmitter { private static final Logger LOG = LoggerFactory.getLogger(PerJobSubmitter.class); public static String submit(Options launcherOptions, JobGraph jobGraph) throws Exception { - - fillJobGraphClassPath(jobGraph); if (!StringUtils.isBlank(launcherOptions.getAddjar())) { String addjarPath = URLDecoder.decode(launcherOptions.getAddjar(), Charsets.UTF_8.toString()); List paths = getJarPaths(addjarPath); @@ -62,8 +55,6 @@ public static String submit(Options launcherOptions, JobGraph jobGraph) throws E }); } - - String confProp = launcherOptions.getConfProp(); confProp = URLDecoder.decode(confProp, Charsets.UTF_8.toString()); Properties confProperties = PluginUtil.jsonStrToObject(confProp, Properties.class); @@ -74,7 +65,7 @@ public static String submit(Options launcherOptions, JobGraph jobGraph) throws E String flinkJarPath = launcherOptions.getFlinkJarPath(); - AbstractYarnClusterDescriptor yarnClusterDescriptor = perJobClusterClientBuilder.createPerJobClusterDescriptor(confProperties, flinkJarPath, launcherOptions.getQueue()); + AbstractYarnClusterDescriptor yarnClusterDescriptor = perJobClusterClientBuilder.createPerJobClusterDescriptor(confProperties, flinkJarPath, launcherOptions, jobGraph); ClusterClient clusterClient = yarnClusterDescriptor.deployJobCluster(clusterSpecification, jobGraph,true); String applicationId = clusterClient.getClusterId().toString(); @@ -95,12 +86,4 @@ private static List getJarPaths(String addjarPath) { return paths; } - private static void fillJobGraphClassPath(JobGraph jobGraph) throws MalformedURLException { - Map jobCacheFileConfig = jobGraph.getUserArtifacts(); - for(Map.Entry tmp : jobCacheFileConfig.entrySet()){ - if(tmp.getKey().startsWith("class_path")){ - jobGraph.getClasspaths().add(new URL("file:" + tmp.getValue().filePath)); - } - } - } } diff --git a/launcher/src/main/test/java/com/dtstack/flink/sql/launcher/PluginLoadModeTest.java b/launcher/src/main/test/java/com/dtstack/flink/sql/launcher/PluginLoadModeTest.java new file mode 100644 index 000000000..0a153d018 --- /dev/null +++ b/launcher/src/main/test/java/com/dtstack/flink/sql/launcher/PluginLoadModeTest.java @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.launcher; + + +/** + * yarnPer提交任务时指定pluginLoadMode + * Date: 2019/11/6 + * Company: www.dtstack.com + * @author maqi + */ +public class PluginLoadModeTest { + public static void testShipfileMode() throws Exception { + String[] sql = new String[]{"--mode", "yarnPer", "-sql", "/Users/maqi/tmp/json/group_tmp4.txt", "-name", "PluginLoadModeTest", + "-localSqlPluginPath", "/Users/maqi/code/dtstack/dt-center-flinkStreamSQL/plugins", + "-remoteSqlPluginPath", "/Users/maqi/code/dtstack/dt-center-flinkStreamSQL/plugins", + "-allowNonRestoredState", "false", "-flinkconf", "/Users/maqi/tmp/flink-1.8.1/conf", + "-confProp", "{\"sql.checkpoint.cleanup.mode\":\"false\",\"sql.checkpoint.interval\":10000,\"time.characteristic\":\"EventTime\"}", + "-yarnconf", "/Users/maqi/tmp/hadoop", "-flinkJarPath", "/Users/maqi/tmp/flink-1.8.1/lib", "-queue", "c", "-pluginLoadMode", "shipfile"}; + System.setProperty("HADOOP_USER_NAME", "admin"); + LauncherMain.main(sql); + } + + public static void testClasspathMode() throws Exception { + String[] sql = new String[]{"--mode", "yarnPer", "-sql", "/Users/maqi/tmp/json/group_tmp4.txt", "-name", "PluginLoadModeTest", + "-localSqlPluginPath", "/Users/maqi/code/dtstack/dt-center-flinkStreamSQL/plugins", + "-remoteSqlPluginPath", "/opt/dtstack/180_flinkplugin/sqlplugin", + "-allowNonRestoredState", "false", "-flinkconf", "/Users/maqi/tmp/flink-1.8.1/conf", + "-confProp", "{\"sql.checkpoint.cleanup.mode\":\"false\",\"sql.checkpoint.interval\":10000,\"time.characteristic\":\"EventTime\"}", + "-yarnconf", "/Users/maqi/tmp/hadoop", "-flinkJarPath", "/Users/maqi/tmp/flink-1.8.1/lib", "-queue", "c", "-pluginLoadMode", "classpath"}; + System.setProperty("HADOOP_USER_NAME", "admin"); + LauncherMain.main(sql); + } + + public static void main(String[] args) throws Exception { + testShipfileMode(); +// testClasspathMode(); + } +} diff --git a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java index d1a38ca5f..4dc7c26b4 100644 --- a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java +++ b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllReqRow.java @@ -36,8 +36,8 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; import org.apache.flink.util.Collector; diff --git a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllSideInfo.java b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllSideInfo.java index b3ef1f7e7..6191114b5 100644 --- a/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllSideInfo.java +++ b/mongo/mongo-side/mongo-all-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAllSideInfo.java @@ -24,12 +24,10 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.mongo.table.MongoSideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.commons.collections.CollectionUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java index 226885ed7..150053246 100644 --- a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java +++ b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncReqRow.java @@ -40,7 +40,7 @@ import com.mongodb.connection.ConnectionPoolSettings; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; diff --git a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncSideInfo.java b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncSideInfo.java index 1590912a1..365026972 100644 --- a/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncSideInfo.java +++ b/mongo/mongo-side/mongo-async-side/src/main/java/com/dtstack/flink/sql/side/mongo/MongoAsyncSideInfo.java @@ -29,7 +29,7 @@ import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/mongo/mongo-side/mongo-side-core/src/main/java/com/dtstack/flink/sql/side/mongo/table/MongoSideTableInfo.java b/mongo/mongo-side/mongo-side-core/src/main/java/com/dtstack/flink/sql/side/mongo/table/MongoSideTableInfo.java index 721960003..a5c834469 100644 --- a/mongo/mongo-side/mongo-side-core/src/main/java/com/dtstack/flink/sql/side/mongo/table/MongoSideTableInfo.java +++ b/mongo/mongo-side/mongo-side-core/src/main/java/com/dtstack/flink/sql/side/mongo/table/MongoSideTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.side.mongo.table; import com.dtstack.flink.sql.side.SideTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/mongo/mongo-sink/src/main/java/com/dtstack/flink/sql/sink/mongo/table/MongoTableInfo.java b/mongo/mongo-sink/src/main/java/com/dtstack/flink/sql/sink/mongo/table/MongoTableInfo.java index 91de9eba6..02a96d6bb 100644 --- a/mongo/mongo-sink/src/main/java/com/dtstack/flink/sql/sink/mongo/table/MongoTableInfo.java +++ b/mongo/mongo-sink/src/main/java/com/dtstack/flink/sql/sink/mongo/table/MongoTableInfo.java @@ -20,7 +20,7 @@ package com.dtstack.flink.sql.sink.mongo.table; import com.dtstack.flink.sql.table.TargetTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/mysql/mysql-side/mysql-all-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAllReqRow.java b/mysql/mysql-side/mysql-all-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAllReqRow.java index a60f5774e..b6b7f45cb 100644 --- a/mysql/mysql-side/mysql-all-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAllReqRow.java +++ b/mysql/mysql-side/mysql-all-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAllReqRow.java @@ -24,7 +24,7 @@ import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/mysql/mysql-side/mysql-async-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAsyncReqRow.java b/mysql/mysql-side/mysql-async-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAsyncReqRow.java index 51f311276..6120767ef 100644 --- a/mysql/mysql-side/mysql-async-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAsyncReqRow.java +++ b/mysql/mysql-side/mysql-async-side/src/main/java/com/dtstack/flink/sql/side/mysql/MysqlAsyncReqRow.java @@ -69,11 +69,12 @@ public void open(Configuration parameters) throws Exception { .put("idle_connection_test_period", DEFAULT_IDLE_CONNECTION_TEST_PEROID) .put("test_connection_on_checkin", DEFAULT_TEST_CONNECTION_ON_CHECKIN); - + System.setProperty("vertx.disableFileCPResolving", "true"); VertxOptions vo = new VertxOptions(); vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + vo.setFileResolverCachingEnabled(false); Vertx vertx = Vertx.vertx(vo); setRdbSQLClient(JDBCClient.createNonShared(vertx, mysqlClientConfig)); } diff --git a/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java b/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java index d045729e8..9ba6736be 100644 --- a/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java +++ b/mysql/mysql-sink/src/main/java/com/dtstack/flink/sql/sink/mysql/MysqlSink.java @@ -23,6 +23,8 @@ import com.dtstack.flink.sql.sink.IStreamSinkGener; import com.dtstack.flink.sql.sink.rdb.RdbSink; import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; +import com.dtstack.flink.sql.util.DtStringUtil; + import java.util.List; import java.util.Map; @@ -46,17 +48,17 @@ public RetractJDBCOutputFormat getOutputFormat() { } @Override - public void buildSql(String tableName, List fields) { + public void buildSql(String scheam, String tableName, List fields) { buildInsertSql(tableName, fields); } @Override - public String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField) { + public String buildUpdateSql(String schema, String tableName, List fieldNames, Map> realIndexes, List fullField) { return null; } private void buildInsertSql(String tableName, List fields) { - String sqlTmp = "replace into " + tableName + " (${fields}) values (${placeholder})"; + String sqlTmp = "replace into " + tableName + " (${fields}) values (${placeholder})"; String fieldsStr = ""; String placeholder = ""; diff --git a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java index 278e5e665..18d9ba045 100644 --- a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java +++ b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllReqRow.java @@ -24,7 +24,7 @@ import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; diff --git a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java index 801e0a69b..914a57aef 100644 --- a/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java +++ b/oracle/oracle-side/oracle-all-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAllSideInfo.java @@ -22,6 +22,7 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.rdb.all.RdbAllSideInfo; import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; @@ -40,15 +41,11 @@ public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { sqlCondition = "select ${selectField} from ${tableName} "; - sqlCondition = sqlCondition.replace("${tableName}", dealFiled(rdbSideTableInfo.getTableName())).replace("${selectField}", dealLowerSelectFiled(sideSelectFields)); + sqlCondition = sqlCondition.replace("${tableName}", DtStringUtil.getTableFullPath(rdbSideTableInfo.getSchema(), rdbSideTableInfo.getTableName())).replace("${selectField}", dealLowerSelectFiled(sideSelectFields)); System.out.println("---------side_exe_sql-----\n" + sqlCondition); } - private String dealFiled(String field) { - return "\"" + field + "\""; - } - private String dealLowerSelectFiled(String fieldsStr) { StringBuilder sb = new StringBuilder(); String[] fields = fieldsStr.split(","); diff --git a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java index e1d2fc69e..f973ffee2 100644 --- a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java +++ b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncReqRow.java @@ -59,11 +59,14 @@ public void open(Configuration parameters) throws Exception { .put("provider_class", DT_PROVIDER_CLASS) .put("preferred_test_query", PREFERRED_TEST_QUERY_SQL) .put("idle_connection_test_period", DEFAULT_IDLE_CONNECTION_TEST_PEROID) - .put("test_connection_on_checkin", DEFAULT_TEST_CONNECTION_ON_CHECKIN);; + .put("test_connection_on_checkin", DEFAULT_TEST_CONNECTION_ON_CHECKIN); + + System.setProperty("vertx.disableFileCPResolving", "true"); VertxOptions vo = new VertxOptions(); vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + vo.setFileResolverCachingEnabled(false); Vertx vertx = Vertx.vertx(vo); setRdbSQLClient(JDBCClient.createNonShared(vertx, oracleClientConfig)); } diff --git a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java index 2c87199f4..c9d2f98d9 100644 --- a/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java +++ b/oracle/oracle-side/oracle-async-side/src/main/java/com/dtstack/flink/sql/side/oracle/OracleAsyncSideInfo.java @@ -23,14 +23,12 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.rdb.async.RdbAsyncSideInfo; import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import com.dtstack.flink.sql.util.DtStringUtil; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; -import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; -import java.util.Arrays; import java.util.List; @@ -68,10 +66,12 @@ public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { } } - sqlCondition = sqlCondition.replace("${tableName}", dealLowerFiled(rdbSideTableInfo.getTableName())).replace("${selectField}", dealLowerSelectFiled(sideSelectFields)); + sqlCondition = sqlCondition.replace("${tableName}", DtStringUtil.getTableFullPath(rdbSideTableInfo.getSchema(), rdbSideTableInfo.getTableName())).replace("${selectField}", dealLowerSelectFiled(sideSelectFields)); System.out.println("---------side_exe_sql-----\n" + sqlCondition); } + + private String dealLowerFiled(String field) { return "\"" + field + "\""; } diff --git a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java index d8669b11f..c30dc9c60 100644 --- a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java +++ b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/OracleSink.java @@ -21,8 +21,9 @@ import com.dtstack.flink.sql.sink.rdb.RdbSink; import com.dtstack.flink.sql.sink.rdb.format.ExtendOutputFormat; import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; +import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.ArrayList; import java.util.Iterator; @@ -50,17 +51,18 @@ public RetractJDBCOutputFormat getOutputFormat() { } @Override - public void buildSql(String tableName, List fields) { - buildInsertSql(tableName, fields); + public void buildSql(String scheam, String tableName, List fields) { + buildInsertSql(scheam, tableName, fields); } - private void buildInsertSql(String tableName, List fields) { + private void buildInsertSql(String scheam, String tableName, List fields) { + + tableName = DtStringUtil.getTableFullPath(scheam,tableName); - tableName = quoteTable(tableName); String sqlTmp = "insert into " + tableName + " (${fields}) values (${placeholder})"; List adaptFields = Lists.newArrayList(); - fields.forEach(field -> adaptFields.add(quoteColumn(field))); + fields.forEach(field -> adaptFields.add(DtStringUtil.addQuoteForStr(field))); String fieldsStr = StringUtils.join(adaptFields, ","); String placeholder = ""; @@ -73,15 +75,37 @@ private void buildInsertSql(String tableName, List fields) { this.sql = sqlTmp; } + /** + * use MERGE INTO build oracle replace into sql + * @param tableName + * @param fieldNames create table contained column columns + * @param realIndexes + * @param fullField real columns , query from db + * @return + */ @Override - public String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField) { - tableName = quoteTable(tableName); - return "MERGE INTO " + tableName + " T1 USING " + public String buildUpdateSql(String scheam, String tableName, List fieldNames, Map> realIndexes, List fullField) { + tableName = DtStringUtil.getTableFullPath(scheam, tableName); + + StringBuilder sb = new StringBuilder(); + + sb.append("MERGE INTO " + tableName + " T1 USING " + "(" + makeValues(fieldNames) + ") T2 ON (" - + updateKeySql(realIndexes) + ") WHEN MATCHED THEN UPDATE SET " - + getUpdateSql(fieldNames, fullField, "T1", "T2", keyColList(realIndexes)) + " WHEN NOT MATCHED THEN " + + updateKeySql(realIndexes) + ") "); + + + String updateSql = getUpdateSql(fieldNames, fullField, "T1", "T2", keyColList(realIndexes)); + + if (StringUtils.isNotEmpty(updateSql)) { + sb.append(" WHEN MATCHED THEN UPDATE SET "); + sb.append(updateSql); + } + + sb.append(" WHEN NOT MATCHED THEN " + "INSERT (" + quoteColumns(fieldNames) + ") VALUES (" - + quoteColumns(fieldNames, "T2") + ")"; + + quoteColumns(fieldNames, "T2") + ")"); + + return sb.toString(); } @@ -90,17 +114,22 @@ public String quoteColumns(List column) { } public String quoteColumns(List column, String table) { - String prefix = StringUtils.isBlank(table) ? "" : quoteTable(table) + "."; + String prefix = StringUtils.isBlank(table) ? "" : DtStringUtil.addQuoteForStr(table) + "."; List list = new ArrayList<>(); for (String col : column) { - list.add(prefix + quoteColumn(col)); + list.add(prefix + DtStringUtil.addQuoteForStr(col)); } return StringUtils.join(list, ","); } - protected List keyColList(Map> updateKey) { + /** + * extract all distinct index column + * @param realIndexes + * @return + */ + protected List keyColList(Map> realIndexes) { List keyCols = new ArrayList<>(); - for (Map.Entry> entry : updateKey.entrySet()) { + for (Map.Entry> entry : realIndexes.entrySet()) { List list = entry.getValue(); for (String col : list) { if (!containsIgnoreCase(keyCols,col)) { @@ -111,56 +140,64 @@ protected List keyColList(Map> updateKey) { return keyCols; } - public String getUpdateSql(List column, List fullColumn, String leftTable, String rightTable, List keyCols) { - String prefixLeft = StringUtils.isBlank(leftTable) ? "" : quoteTable(leftTable) + "."; - String prefixRight = StringUtils.isBlank(rightTable) ? "" : quoteTable(rightTable) + "."; + /** + * build update sql , such as UPDATE SET "T1".A="T2".A + * @param updateColumn create table contained column columns + * @param fullColumn real columns , query from db + * @param leftTable alias + * @param rightTable alias + * @param indexCols index column + * @return + */ + public String getUpdateSql(List updateColumn, List fullColumn, String leftTable, String rightTable, List indexCols) { + String prefixLeft = StringUtils.isBlank(leftTable) ? "" : DtStringUtil.addQuoteForStr(leftTable) + "."; + String prefixRight = StringUtils.isBlank(rightTable) ? "" : DtStringUtil.addQuoteForStr(rightTable) + "."; List list = new ArrayList<>(); for (String col : fullColumn) { - if (keyCols == null || keyCols.size() == 0 || containsIgnoreCase(keyCols,col)) { + // filter index column + if (indexCols == null || indexCols.size() == 0 || containsIgnoreCase(indexCols,col)) { continue; } - if (fullColumn == null ||containsIgnoreCase(column,col)) { - list.add(prefixLeft + col + "=" + prefixRight + col); + if (containsIgnoreCase(updateColumn,col)) { + list.add(prefixLeft + DtStringUtil.addQuoteForStr(col) + "=" + prefixRight + DtStringUtil.addQuoteForStr(col)); } else { - list.add(prefixLeft + col + "=null"); + list.add(prefixLeft + DtStringUtil.addQuoteForStr(col) + "=null"); } } return StringUtils.join(list, ","); } - public String quoteTable(String table) { - String[] parts = table.split("\\."); - StringBuilder sb = new StringBuilder(); - for (int i = 0; i < parts.length; ++i) { - if (i != 0) { - sb.append("."); - } - sb.append(getStartQuote() + parts[i] + getEndQuote()); - } - return sb.toString(); - } - + /** + * build connect sql by index column, such as T1."A"=T2."A" + * @param updateKey + * @return + */ public String updateKeySql(Map> updateKey) { List exprList = new ArrayList<>(); for (Map.Entry> entry : updateKey.entrySet()) { List colList = new ArrayList<>(); for (String col : entry.getValue()) { - colList.add("T1." + quoteColumn(col) + "=T2." + quoteColumn(col)); + colList.add("T1." + DtStringUtil.addQuoteForStr(col) + "=T2." + DtStringUtil.addQuoteForStr(col)); } exprList.add(StringUtils.join(colList, " AND ")); } return StringUtils.join(exprList, " OR "); } - + /** + * build select sql , such as (SELECT ? "A",? "B" FROM DUAL) + * + * @param column destination column + * @return + */ public String makeValues(List column) { StringBuilder sb = new StringBuilder("SELECT "); for (int i = 0; i < column.size(); ++i) { if (i != 0) { sb.append(","); } - sb.append("? " + quoteColumn(column.get(i))); + sb.append("? " + DtStringUtil.addQuoteForStr(column.get(i))); } sb.append(" FROM DUAL"); return sb.toString(); @@ -175,17 +212,6 @@ public boolean containsIgnoreCase(List l, String s) { return false; } - public String quoteColumn(String column) { - return getStartQuote() + column + getEndQuote(); - } - - public String getStartQuote() { - return "\""; - } - - public String getEndQuote() { - return "\""; - } } diff --git a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java index 6db2c9c06..aff096bd3 100644 --- a/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java +++ b/oracle/oracle-sink/src/main/java/com/dtstack/flink/sql/sink/oracle/table/OracleSinkParser.java @@ -34,8 +34,8 @@ public class OracleSinkParser extends RdbSinkParser { @Override public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { - TableInfo sqlserverTableInfo = super.getTableInfo(tableName, fieldsInfo, props); - sqlserverTableInfo.setType(CURR_TYPE); - return sqlserverTableInfo; + TableInfo oracleTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + oracleTableInfo.setType(CURR_TYPE); + return oracleTableInfo; } } diff --git a/pom.xml b/pom.xml index cf203002a..b4c3478b3 100644 --- a/pom.xml +++ b/pom.xml @@ -13,6 +13,7 @@ kafka09 kafka10 kafka11 + kafka mysql hbase elasticsearch5 @@ -23,9 +24,12 @@ sqlserver oracle cassandra + kudu + postgresql serversocket console + clickhouse diff --git a/postgresql/pom.xml b/postgresql/pom.xml new file mode 100644 index 000000000..53398b87b --- /dev/null +++ b/postgresql/pom.xml @@ -0,0 +1,44 @@ + + + + flink.sql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + pom + sql.postgresql + + + postgresql-side + postgresql-sink + + + 1.0-SNAPSHOT + 42.2.2 + + + + + junit + junit + 3.8.1 + test + + + + com.dtstack.flink + sql.core + ${sql.core.version} + provided + + + org.postgresql + postgresql + ${postgresql.version} + + + diff --git a/postgresql/postgresql-side/pom.xml b/postgresql/postgresql-side/pom.xml new file mode 100644 index 000000000..0e3723ea6 --- /dev/null +++ b/postgresql/postgresql-side/pom.xml @@ -0,0 +1,38 @@ + + + + sql.postgresql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.postgresql + 1.0-SNAPSHOT + postgresql-side + pom + + + postgresql-side-core + postgresql-async-side + postgresql-all-side + + + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.rdb + ${rdb.side.version} + + + + + diff --git a/postgresql/postgresql-side/postgresql-all-side/pom.xml b/postgresql/postgresql-side/postgresql-all-side/pom.xml new file mode 100644 index 000000000..d076d9300 --- /dev/null +++ b/postgresql/postgresql-side/postgresql-all-side/pom.xml @@ -0,0 +1,92 @@ + + + + sql.side.postgresql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.all.postgresql + postgresql-all-side + + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.postgresql.core + ${sql.side.postgresql.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + + diff --git a/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllReqRow.java b/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllReqRow.java new file mode 100644 index 000000000..6d68cfdca --- /dev/null +++ b/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllReqRow.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.postgresql; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; +import com.dtstack.flink.sql.util.DtStringUtil; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.util.List; +import java.util.Map; + +/** + * side operator with cache for all(period reload) + * Date: 2019-08-11 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlAllReqRow extends RdbAllReqRow { + + private static final long serialVersionUID = 2098635140857937717L; + + private static final Logger LOG = LoggerFactory.getLogger(PostgresqlAllReqRow.class); + + private static final String POSTGRESQL_DRIVER = "org.postgresql.Driver"; + + public PostgresqlAllReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new PostgresqlAllSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + @Override + public Connection getConn(String dbURL, String userName, String password) { + try { + Class.forName(POSTGRESQL_DRIVER); + //add param useCursorFetch=true + Map addParams = Maps.newHashMap(); + addParams.put("useCursorFetch", "true"); + String targetDbUrl = DtStringUtil.addJdbcParam(dbURL, addParams, true); + return DriverManager.getConnection(targetDbUrl, userName, password); + } catch (Exception e) { + LOG.error("", e); + throw new RuntimeException("", e); + } + } +} diff --git a/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllSideInfo.java b/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllSideInfo.java new file mode 100644 index 000000000..d383ee46d --- /dev/null +++ b/postgresql/postgresql-side/postgresql-all-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAllSideInfo.java @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.postgresql; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.all.RdbAllSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + +/** + * Reason: + * Date: 2019-08-11 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlAllSideInfo extends RdbAllSideInfo { + public PostgresqlAllSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/postgresql/postgresql-side/postgresql-async-side/pom.xml b/postgresql/postgresql-side/postgresql-async-side/pom.xml new file mode 100644 index 000000000..13296e7f3 --- /dev/null +++ b/postgresql/postgresql-side/postgresql-async-side/pom.xml @@ -0,0 +1,92 @@ + + + + sql.side.postgresql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.async.postgresql + + postgresql-async-side + + jar + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.side.postgresql.core + ${sql.side.postgresql.core.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + diff --git a/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncReqRow.java b/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncReqRow.java new file mode 100644 index 000000000..02b333819 --- /dev/null +++ b/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncReqRow.java @@ -0,0 +1,74 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.postgresql; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncReqRow; +import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; +import io.vertx.core.Vertx; +import io.vertx.core.VertxOptions; +import io.vertx.core.json.JsonObject; +import io.vertx.ext.jdbc.JDBCClient; +import org.apache.flink.api.java.typeutils.RowTypeInfo; +import org.apache.flink.configuration.Configuration; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +/** + * Date: 2019-08-11 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlAsyncReqRow extends RdbAsyncReqRow { + + private static final Logger LOG = LoggerFactory.getLogger(PostgresqlAsyncReqRow.class); + + private final static String POSTGRESQL_DRIVER = "org.postgresql.Driver"; + + public PostgresqlAsyncReqRow(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(new PostgresqlAsyncSideInfo(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo)); + } + + + @Override + public void open(Configuration parameters) throws Exception { + super.open(parameters); + JsonObject pgClientConfig = new JsonObject(); + RdbSideTableInfo rdbSideTableInfo = (RdbSideTableInfo) sideInfo.getSideTableInfo(); + pgClientConfig.put("url", rdbSideTableInfo.getUrl()) + .put("driver_class", POSTGRESQL_DRIVER) + .put("max_pool_size", DEFAULT_MAX_DB_CONN_POOL_SIZE) + .put("user", rdbSideTableInfo.getUserName()) + .put("password", rdbSideTableInfo.getPassword()); + + VertxOptions vo = new VertxOptions(); + vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); + vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + Vertx vertx = Vertx.vertx(vo); + setRdbSQLClient(JDBCClient.createNonShared(vertx, pgClientConfig)); + } + +} diff --git a/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncSideInfo.java b/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncSideInfo.java new file mode 100644 index 000000000..1d89f4894 --- /dev/null +++ b/postgresql/postgresql-side/postgresql-async-side/src/main/java/com/dtstack/flink/sql/side/postgresql/PostgresqlAsyncSideInfo.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.dtstack.flink.sql.side.postgresql; + +import com.dtstack.flink.sql.side.FieldInfo; +import com.dtstack.flink.sql.side.JoinInfo; +import com.dtstack.flink.sql.side.SideTableInfo; +import com.dtstack.flink.sql.side.rdb.async.RdbAsyncSideInfo; +import org.apache.flink.api.java.typeutils.RowTypeInfo; + +import java.util.List; + +/** + * Reason: + * Date: 2019-08-11 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlAsyncSideInfo extends RdbAsyncSideInfo { + + public PostgresqlAsyncSideInfo(RowTypeInfo rowTypeInfo, JoinInfo joinInfo, List outFieldInfoList, SideTableInfo sideTableInfo) { + super(rowTypeInfo, joinInfo, outFieldInfoList, sideTableInfo); + } +} diff --git a/postgresql/postgresql-side/postgresql-side-core/pom.xml b/postgresql/postgresql-side/postgresql-side-core/pom.xml new file mode 100644 index 000000000..6120767eb --- /dev/null +++ b/postgresql/postgresql-side/postgresql-side-core/pom.xml @@ -0,0 +1,18 @@ + + + + sql.side.postgresql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.side.postgresql.core + 1.0-SNAPSHOT + jar + postgresql-side-core + + diff --git a/postgresql/postgresql-side/postgresql-side-core/src/main/java/com/dtstack/flink/sql/side/postgresql/table/PostgresqlSideParser.java b/postgresql/postgresql-side/postgresql-side-core/src/main/java/com/dtstack/flink/sql/side/postgresql/table/PostgresqlSideParser.java new file mode 100644 index 000000000..faee2c704 --- /dev/null +++ b/postgresql/postgresql-side/postgresql-side-core/src/main/java/com/dtstack/flink/sql/side/postgresql/table/PostgresqlSideParser.java @@ -0,0 +1,45 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.side.postgresql.table; + +import com.dtstack.flink.sql.side.rdb.table.RdbSideParser; +import com.dtstack.flink.sql.table.TableInfo; + +import java.util.Map; + +/** + * Reason: + * Date: 2019-08-11 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlSideParser extends RdbSideParser { + + private static final String CURR_TYPE = "postgresql"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + TableInfo pgTableInfo = super.getTableInfo(tableName, fieldsInfo, props); + pgTableInfo.setType(CURR_TYPE); + return pgTableInfo; + } +} diff --git a/postgresql/postgresql-sink/pom.xml b/postgresql/postgresql-sink/pom.xml new file mode 100644 index 000000000..d65368358 --- /dev/null +++ b/postgresql/postgresql-sink/pom.xml @@ -0,0 +1,90 @@ + + + sql.postgresql + com.dtstack.flink + 1.0-SNAPSHOT + ../pom.xml + + 4.0.0 + + sql.sink.postgresql + jar + + postgresql-sink + http://maven.apache.org + + + 1.0-SNAPSHOT + + + + + com.dtstack.flink + sql.sink.rdb + ${sql.sink.rdb.version} + + + + + + + org.apache.maven.plugins + maven-shade-plugin + 1.4 + + + package + + shade + + + + + + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + + + + + maven-antrun-plugin + 1.2 + + + copy-resources + + package + + run + + + + + + + + + + + + + + + + + + diff --git a/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/PostgresqlSink.java b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/PostgresqlSink.java new file mode 100644 index 000000000..663816ca0 --- /dev/null +++ b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/PostgresqlSink.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.postgresql; + + +import com.dtstack.flink.sql.sink.IStreamSinkGener; +import com.dtstack.flink.sql.sink.postgresql.table.PostgresqlTableInfo; +import com.dtstack.flink.sql.sink.rdb.RdbSink; +import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; +import com.dtstack.flink.sql.table.TargetTableInfo; +import org.apache.commons.lang3.StringUtils; + +import java.util.List; +import java.util.Map; + +/** + * Date: 2019-08-22 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlSink extends RdbSink implements IStreamSinkGener { + + private static final String POSTGRESQL_DRIVER = "org.postgresql.Driver"; + + private boolean isUpsert; + + private String keyField; + + public PostgresqlSink() { + } + + @Override + public RdbSink genStreamSink(TargetTableInfo targetTableInfo) { + PostgresqlTableInfo pgTableInfo = (PostgresqlTableInfo) targetTableInfo; + this.isUpsert = pgTableInfo.isUpsert(); + this.keyField = pgTableInfo.getKeyField(); + super.genStreamSink(targetTableInfo); + return this; + } + + @Override + public RetractJDBCOutputFormat getOutputFormat() { + return new RetractJDBCOutputFormat(); + } + + @Override + public void buildSql(String scheam, String tableName, List fields) { + buildInsertSql(tableName, fields); + } + + @Override + public String buildUpdateSql(String schema, String tableName, List fieldNames, Map> realIndexes, List fullField) { + return null; + } + + private void buildInsertSql(String tableName, List fields) { + StringBuffer sqlBuffer = new StringBuffer(); + + sqlBuffer.append("insert into ".concat(tableName) + .concat(" (") + .concat(StringUtils.join(fields, ",")) + .concat(") ") + ); + sqlBuffer.append("values ("); + StringBuffer upsertFields = new StringBuffer(); + for (String fieldName : fields) { + sqlBuffer.append("?,"); + if (this.isUpsert) { + if (fieldName.equals(this.keyField)) { + continue; + } + upsertFields.append(String.format("%s=excluded.%s,", fieldName, fieldName)); + } + } + sqlBuffer.deleteCharAt(sqlBuffer.length() - 1); + sqlBuffer.append(")"); + + if (this.isUpsert) { + upsertFields.deleteCharAt(upsertFields.length() - 1); + sqlBuffer.append(" ON conflict(".concat(keyField).concat(")")); + sqlBuffer.append(" DO UPDATE SET "); + sqlBuffer.append(upsertFields); + } + this.sql = sqlBuffer.toString(); + } + + @Override + public String getDriverName() { + return POSTGRESQL_DRIVER; + } + +} diff --git a/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlSinkParser.java b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlSinkParser.java new file mode 100644 index 000000000..f773b5a5c --- /dev/null +++ b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlSinkParser.java @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.postgresql.table; + +import com.dtstack.flink.sql.sink.rdb.table.RdbSinkParser; +import com.dtstack.flink.sql.table.TableInfo; +import com.dtstack.flink.sql.util.MathUtil; +import org.apache.commons.lang3.StringUtils; + +import java.util.Map; + +/** + * Date: 2019-08-22 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlSinkParser extends RdbSinkParser { + private static final String CURR_TYPE = "postgresql"; + + @Override + public TableInfo getTableInfo(String tableName, String fieldsInfo, Map props) { + PostgresqlTableInfo pgTableInfo = new PostgresqlTableInfo(); + pgTableInfo.setName(tableName); + parseFieldsInfo(fieldsInfo, pgTableInfo); + + pgTableInfo.setParallelism(MathUtil.getIntegerVal(props.get(PostgresqlTableInfo.PARALLELISM_KEY.toLowerCase()))); + pgTableInfo.setUrl(MathUtil.getString(props.get(PostgresqlTableInfo.URL_KEY.toLowerCase()))); + pgTableInfo.setTableName(MathUtil.getString(props.get(PostgresqlTableInfo.TABLE_NAME_KEY.toLowerCase()))); + pgTableInfo.setUserName(MathUtil.getString(props.get(PostgresqlTableInfo.USER_NAME_KEY.toLowerCase()))); + pgTableInfo.setPassword(MathUtil.getString(props.get(PostgresqlTableInfo.PASSWORD_KEY.toLowerCase()))); + pgTableInfo.setBatchSize(MathUtil.getIntegerVal(props.get(PostgresqlTableInfo.BATCH_SIZE_KEY.toLowerCase()))); + pgTableInfo.setBatchWaitInterval(MathUtil.getLongVal(props.get(PostgresqlTableInfo.BATCH_WAIT_INTERVAL_KEY.toLowerCase()))); + pgTableInfo.setBufferSize(MathUtil.getString(props.get(PostgresqlTableInfo.BUFFER_SIZE_KEY.toLowerCase()))); + pgTableInfo.setFlushIntervalMs(MathUtil.getString(props.get(PostgresqlTableInfo.FLUSH_INTERVALMS_KEY.toLowerCase()))); + + pgTableInfo.setKeyField(MathUtil.getString(props.get(PostgresqlTableInfo.TABLE_KEY_FIELD.toLowerCase()))); + + String isUpsertStr = (String) props.get(PostgresqlTableInfo.TABLE_IS_UPSERT.toLowerCase()); + pgTableInfo.setUpsert(!StringUtils.isEmpty(isUpsertStr) && isUpsertStr.equals("true") ? true : false); + + pgTableInfo.check(); + return pgTableInfo; + } +} diff --git a/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlTableInfo.java b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlTableInfo.java new file mode 100644 index 000000000..78df0de14 --- /dev/null +++ b/postgresql/postgresql-sink/src/main/java/com/dtstack/flink/sql/sink/postgresql/table/PostgresqlTableInfo.java @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + + +package com.dtstack.flink.sql.sink.postgresql.table; + +import com.dtstack.flink.sql.sink.rdb.table.RdbTableInfo; +import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; + +/** + * Date: 2019-08-22 + * Company: mmg + * + * @author tcm + */ + +public class PostgresqlTableInfo extends RdbTableInfo { + + public static final String TABLE_IS_UPSERT = "isUpsert"; + + public static final String TABLE_KEY_FIELD = "keyField"; + + private static final String CURR_TYPE = "postgresql"; + + private boolean isUpsert; + + private String keyField; + + + public PostgresqlTableInfo() { + setType(CURR_TYPE); + } + + public boolean isUpsert() { + return isUpsert; + } + + public void setUpsert(boolean upsert) { + isUpsert = upsert; + } + + public String getKeyField() { + return keyField; + } + + public void setKeyField(String keyField) { + this.keyField = keyField; + } + + @Override + public boolean check() { + Preconditions.checkNotNull(getUrl(), "postgresql field of URL is required"); + Preconditions.checkNotNull(getTableName(), "postgresql field of tableName is required"); + Preconditions.checkNotNull(getUserName(), "postgresql field of userName is required"); + Preconditions.checkNotNull(getPassword(), "postgresql field of password is required"); + if (isUpsert()) { + Preconditions.checkNotNull(getKeyField(), "postgresql field of keyField is required"); + } + return true; + } +} diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java index 26ed7648f..5fc7ba158 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllReqRow.java @@ -24,8 +24,8 @@ import com.dtstack.flink.sql.side.rdb.util.SwitchUtil; import org.apache.calcite.sql.JoinType; import org.apache.commons.collections.CollectionUtils; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; import org.apache.flink.util.Collector; @@ -119,9 +119,12 @@ public void flatMap(Row value, Collector out) throws Exception { for (Integer conValIndex : sideInfo.getEqualValIndex()) { Object equalObj = value.getField(conValIndex); if (equalObj == null) { - out.collect(null); + if (sideInfo.getJoinType() == JoinType.LEFT) { + Row row = fillData(value, null); + out.collect(row); + } + return; } - inputParams.add(equalObj); } @@ -171,7 +174,6 @@ private void loadData(Map>> tmpCache) throws SQ try { for (int i = 0; i < CONN_RETRY_NUM; i++) { - try { connection = getConn(tableInfo.getUrl(), tableInfo.getUserName(), tableInfo.getPassword()); break; @@ -179,7 +181,6 @@ private void loadData(Map>> tmpCache) throws SQ if (i == CONN_RETRY_NUM - 1) { throw new RuntimeException("", e); } - try { String connInfo = "url:" + tableInfo.getUrl() + ";userName:" + tableInfo.getUserName() + ",pwd:" + tableInfo.getPassword(); LOG.warn("get conn fail, wait for 5 sec and try again, connInfo:" + connInfo); diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllSideInfo.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllSideInfo.java index 9ea79802d..c10b394e5 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllSideInfo.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/all/RdbAllSideInfo.java @@ -24,12 +24,10 @@ import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.side.rdb.table.RdbSideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.commons.collections.CollectionUtils; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java index 156f136ef..d997f7d58 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncReqRow.java @@ -26,8 +26,7 @@ import io.vertx.core.json.JsonArray; import io.vertx.ext.sql.SQLClient; import io.vertx.ext.sql.SQLConnection; -import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import org.apache.flink.streaming.api.functions.async.ResultFuture; import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; @@ -78,7 +77,7 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except for (Integer conValIndex : sideInfo.getEqualValIndex()) { Object equalObj = input.getField(conValIndex); if (equalObj == null) { - resultFuture.complete(null); + dealMissKey(input, resultFuture); return; } inputParams.add(equalObj); diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java index 3f3fb323d..0481e3443 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/async/RdbAsyncSideInfo.java @@ -28,10 +28,14 @@ import org.apache.calcite.sql.SqlIdentifier; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.*; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; +import java.util.Arrays; import java.util.List; +import java.util.Map; + /** * Reason: @@ -68,17 +72,39 @@ public void buildEqualInfo(JoinInfo joinInfo, SideTableInfo sideTableInfo) { dealOneEqualCon(sqlNode, sideTableName); } + List whereConditionList = Lists.newArrayList();; + Map physicalFields = rdbSideTableInfo.getPhysicalFields(); + SqlNode whereNode = ((SqlSelect) joinInfo.getSelectNode()).getWhere(); + if (whereNode != null) { + // 解析维表中的过滤条件 + ParseUtils.parseSideWhere(whereNode, physicalFields, whereConditionList); + } + sqlCondition = "select ${selectField} from ${tableName} where "; for (int i = 0; i < equalFieldList.size(); i++) { String equalField = sideTableInfo.getPhysicalFields().getOrDefault(equalFieldList.get(i), equalFieldList.get(i)); - sqlCondition += equalField + "\t" + sqlJoinCompareOperate.get(i) + " ?"; + sqlCondition += equalField + " " + sqlJoinCompareOperate.get(i) + " ? "; if (i != equalFieldList.size() - 1) { sqlCondition += " and "; } } + if (0 != whereConditionList.size()) { + // 如果where条件中第一个符合条件的是维表中的条件 + String firstCondition = whereConditionList.get(0); + if (!"and".equalsIgnoreCase(firstCondition) && !"or".equalsIgnoreCase(firstCondition)) { + whereConditionList.add(0, "and ("); + } else { + whereConditionList.add(1, "("); + } + whereConditionList.add(whereConditionList.size(), ")"); + sqlCondition += String.join(" ", whereConditionList); + } sqlCondition = sqlCondition.replace("${tableName}", rdbSideTableInfo.getTableName()).replace("${selectField}", sideSelectFields); + + System.out.println("--------side sql query:-------------------"); + System.out.println(sqlCondition); } @@ -134,4 +160,4 @@ public void dealOneEqualCon(SqlNode sqlNode, String sideTableName) { } -} +} \ No newline at end of file diff --git a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java index e007cecb0..b9811b0ee 100644 --- a/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java +++ b/rdb/rdb-side/src/main/java/com/dtstack/flink/sql/side/rdb/table/RdbSideParser.java @@ -47,6 +47,8 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map, Serializab protected String tableName; + protected String registerTabName; + protected String sql; protected List primaryKeys; @@ -79,6 +81,8 @@ public abstract class RdbSink implements RetractStreamTableSink, Serializab private int parallelism = -1; + private String schema; + public RichSinkFunction createJdbcSinkFunc() { if (driverName == null || dbURL == null || userName == null || password == null || sqlTypes == null || tableName == null) { @@ -96,6 +100,7 @@ public RichSinkFunction createJdbcSinkFunc() { outputFormat.setTypesArray(sqlTypes); outputFormat.setTableName(tableName); outputFormat.setDbType(dbType); + outputFormat.setSchema(schema); outputFormat.setDbSink(this); outputFormat.verifyField(); @@ -112,6 +117,7 @@ public RdbSink genStreamSink(TargetTableInfo targetTableInfo) { String tmpUserName = rdbTableInfo.getUserName(); String tmpPassword = rdbTableInfo.getPassword(); String tmpTableName = rdbTableInfo.getTableName(); + String tmpRegisterName = rdbTableInfo.getName(); Integer tmpSqlBatchSize = rdbTableInfo.getBatchSize(); if (tmpSqlBatchSize != null) { @@ -136,10 +142,12 @@ public RdbSink genStreamSink(TargetTableInfo targetTableInfo) { this.userName = tmpUserName; this.password = tmpPassword; this.tableName = tmpTableName; + this.registerTabName = tmpRegisterName; this.primaryKeys = rdbTableInfo.getPrimaryKeys(); this.dbType = rdbTableInfo.getType(); + this.schema = rdbTableInfo.getSchema(); - buildSql(tableName, fields); + buildSql(schema, tableName, fields); buildSqlTypes(fieldTypeArray); return this; } @@ -193,7 +201,7 @@ protected void buildSqlTypes(List fieldTypeArray) { public void emitDataStream(DataStream> dataStream) { RichSinkFunction richSinkFunction = createJdbcSinkFunc(); DataStreamSink streamSink = dataStream.addSink(richSinkFunction); - streamSink.name(tableName); + streamSink.name(registerTabName); if (parallelism > 0) { streamSink.setParallelism(parallelism); } @@ -250,7 +258,7 @@ public void setDbType(String dbType) { * @param tableName * @param fields */ - public abstract void buildSql(String tableName, List fields); + public abstract void buildSql(String schema, String tableName, List fields); /** * sqlserver and oracle maybe implement @@ -260,7 +268,7 @@ public void setDbType(String dbType) { * @param realIndexes * @return */ - public abstract String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField); + public abstract String buildUpdateSql(String schema, String tableName, List fieldNames, Map> realIndexes, List fullField); public abstract String getDriverName(); diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java index f288e50c0..6265bbadd 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/ExtendOutputFormat.java @@ -18,8 +18,9 @@ package com.dtstack.flink.sql.sink.rdb.format; +import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.commons.lang3.StringUtils; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import java.sql.ResultSet; import java.sql.SQLException; @@ -62,40 +63,43 @@ public boolean isReplaceInsertQuery() throws SQLException { */ public void fillRealIndexes() throws SQLException { Map> map = Maps.newHashMap(); - ResultSet rs = getDbConn().getMetaData().getIndexInfo(null, null, getTableName(), true, false); + + ResultSet rs = getDbConn().getMetaData().getIndexInfo(null, getSchema(), DtStringUtil.addQuoteForStr(getTableName()), true, false); while (rs.next()) { String indexName = rs.getString("INDEX_NAME"); - if (!map.containsKey(indexName)) { + if (StringUtils.isNotBlank(indexName) && !map.containsKey(indexName)) { map.put(indexName, new ArrayList<>()); } String column_name = rs.getString("COLUMN_NAME"); if (StringUtils.isNotBlank(column_name)) { - column_name = column_name.toUpperCase(); + map.get(indexName).add(column_name); } - map.get(indexName).add(column_name); } for (Map.Entry> entry : map.entrySet()) { String k = entry.getKey(); List v = entry.getValue(); if (v != null && v.size() != 0 && v.get(0) != null) { - getRealIndexes().put(k, v); + realIndexesAdd(k, v); } } } + + /** * get db all column name * * @throws SQLException */ public void fillFullColumns() throws SQLException { - ResultSet rs = getDbConn().getMetaData().getColumns(null, null, getTableName(), null); + // table name not quote + ResultSet rs = getDbConn().getMetaData().getColumns(null, getSchema(), getTableName(), null); while (rs.next()) { String columnName = rs.getString("COLUMN_NAME"); if (StringUtils.isNotBlank(columnName)) { - getFullField().add(columnName.toUpperCase()); + fullFieldAdd(columnName); } } } @@ -108,4 +112,5 @@ public boolean containsIgnoreCase(List l, String s) { } return false; } + } diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java index 40cb712a9..7653c847c 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/format/RetractJDBCOutputFormat.java @@ -23,8 +23,8 @@ import org.apache.commons.lang3.StringUtils; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.configuration.Configuration; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.types.Row; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -64,6 +64,7 @@ public class RetractJDBCOutputFormat extends MetricOutputFormat { private String dbURL; private String tableName; private String dbType; + private String schema; private RdbSink dbSink; // trigger preparedStatement execute batch interval private long batchWaitInterval = 10000l; @@ -106,9 +107,9 @@ public void open(int taskNumber, int numTasks) throws IOException { dbConn = establishConnection(); initMetric(); - if (dbConn.getMetaData().getTables(null, null, tableName, null).next()) { + if (existTabname()) { if (isReplaceInsertQuery()) { - insertQuery = dbSink.buildUpdateSql(tableName, Arrays.asList(dbSink.getFieldNames()), realIndexes, fullField); + insertQuery = dbSink.buildUpdateSql(schema , tableName, Arrays.asList(dbSink.getFieldNames()), realIndexes, fullField); } upload = dbConn.prepareStatement(insertQuery); } else { @@ -209,7 +210,7 @@ private void writeSingleRecord(Row row) { dbConn.commit(); } catch (SQLException e) { outDirtyRecords.inc(); - if (outDirtyRecords.getCount() % dirtyDataPrintFrequency == 0) { + if (outDirtyRecords.getCount() % dirtyDataPrintFrequency == 0 || LOG.isDebugEnabled()) { LOG.error("record insert failed ..", row.toString()); LOG.error("", e); } @@ -400,6 +401,21 @@ public void setUsername(String username) { this.username = username; } + public String getSchema() { + if (StringUtils.isNotEmpty(schema)) { + return schema; + } + return null; + } + + public void setSchema(String schema) { + this.schema = schema; + } + + public boolean existTabname() throws SQLException { + return dbConn.getMetaData().getTables(null, getSchema(), tableName, null).next(); + } + public void setPassword(String password) { this.password = password; } @@ -452,7 +468,11 @@ public String getTableName() { return tableName; } - public Map> getRealIndexes() { + public void realIndexesAdd(String index, List fieldes) { + this.realIndexes.put(index, fieldes); + } + + public Map> getRealIndexes() { return realIndexes; } @@ -464,4 +484,8 @@ public void setBatchWaitInterval(long batchWaitInterval) { public List getFullField() { return fullField; } + + public void fullFieldAdd(String colName) { + this.fullField.add(colName); + } } diff --git a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java index f135dc2f3..beb51ffaa 100644 --- a/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java +++ b/rdb/rdb-sink/src/main/java/com/dtstack/flink/sql/sink/rdb/table/RdbSinkParser.java @@ -46,6 +46,8 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map out) throws Exception { out.collect(null); } String columnName = sideInfo.getEqualFieldList().get(conValIndex); - inputParams.put(columnName, (String) equalObj); + inputParams.put(columnName, equalObj.toString()); } String key = buildKey(inputParams); diff --git a/redis5/redis5-side/redis-all-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAllSideInfo.java b/redis5/redis5-side/redis-all-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAllSideInfo.java index b449d8c02..81e7b1f4f 100644 --- a/redis5/redis5-side/redis-all-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAllSideInfo.java +++ b/redis5/redis5-side/redis-all-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAllSideInfo.java @@ -23,11 +23,9 @@ import com.dtstack.flink.sql.side.SideInfo; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java index 4da17eb22..e2b256421 100644 --- a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java +++ b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncReqRow.java @@ -32,14 +32,12 @@ import io.lettuce.core.cluster.RedisClusterClient; import io.lettuce.core.cluster.api.StatefulRedisClusterConnection; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; -import org.apache.flink.calcite.shaded.com.google.common.collect.Maps; +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.async.ResultFuture; -import org.apache.flink.table.typeutils.TimeIndicatorTypeInfo; import org.apache.flink.types.Row; -import java.sql.Timestamp; import java.util.Collections; import java.util.List; import java.util.Map; @@ -124,12 +122,12 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except Integer conValIndex = sideInfo.getEqualValIndex().get(i); Object equalObj = input.getField(conValIndex); if(equalObj == null){ - resultFuture.complete(null); + dealMissKey(input, resultFuture); return; } - + String value = equalObj.toString(); keyData.add(sideInfo.getEqualFieldList().get(i)); - keyData.add((String) equalObj); + keyData.add(value); } String key = buildCacheKey(keyData); @@ -159,29 +157,33 @@ public void asyncInvoke(Row input, ResultFuture resultFuture) throws Except Map keyValue = Maps.newHashMap(); List value = async.keys(key + ":*").get(); String[] values = value.toArray(new String[value.size()]); - RedisFuture>> future = ((RedisStringAsyncCommands) async).mget(values); - future.thenAccept(new Consumer>>() { - @Override - public void accept(List> keyValues) { - if (keyValues.size() != 0){ - for (int i=0; i>> future = ((RedisStringAsyncCommands) async).mget(values); + future.thenAccept(new Consumer>>() { + @Override + public void accept(List> keyValues) { + if (keyValues.size() != 0) { + for (int i = 0; i < keyValues.size(); i++) { + String[] splitKeys = keyValues.get(i).getKey().split(":"); + keyValue.put(splitKeys[1], splitKeys[2]); + keyValue.put(splitKeys[3], keyValues.get(i).getValue()); + } + Row row = fillData(input, keyValue); + resultFuture.complete(Collections.singleton(row)); + if (openCache()) { + putCache(key, CacheObj.buildCacheObj(ECacheContentType.MultiLine, keyValue)); + } + } else { + dealMissKey(input, resultFuture); + if (openCache()) { + putCache(key, CacheMissVal.getMissKeyObj()); + } } } - } - }); + }); + } } private String buildCacheKey(List keyData) { diff --git a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncSideInfo.java b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncSideInfo.java index 2c85aaf3a..9bdefe8c5 100644 --- a/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncSideInfo.java +++ b/redis5/redis5-side/redis-async-side/src/main/java/com/dtstack/flink/sql/side/redis/RedisAsyncSideInfo.java @@ -23,11 +23,9 @@ import com.dtstack.flink.sql.side.SideInfo; import com.dtstack.flink.sql.side.SideTableInfo; import com.dtstack.flink.sql.util.ParseUtils; -import org.apache.calcite.sql.SqlBasicCall; -import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlNode; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.calcite.shaded.com.google.common.collect.Lists; +import com.google.common.collect.Lists; import java.util.List; diff --git a/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java index 40dae6c68..5007868e9 100644 --- a/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java +++ b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideReqRow.java @@ -24,6 +24,8 @@ import org.apache.flink.types.Row; import java.io.Serializable; +import java.math.BigDecimal; +import java.sql.Date; import java.sql.Timestamp; import java.util.Map; @@ -63,10 +65,50 @@ public Row fillData(Row input, Object sideInput) { row.setField(entry.getKey(), null); }else{ String key = sideInfo.getSideFieldNameIndex().get(entry.getKey()); - row.setField(entry.getKey(), sideInputMap.get(key)); + setRowField(row, entry.getKey(), sideInfo, sideInputMap.get(key)); } } return row; } + + public void setRowField(Row row, Integer index, SideInfo sideInfo, String value) { + Integer keyIndex = sideInfo.getSideFieldIndex().get(index); + String classType = sideInfo.getSideTableInfo().getFieldClassList().get(keyIndex).getName(); + switch (classType){ + case "java.lang.Integer": + row.setField(index, Integer.valueOf(value)); + break; + case "java.lang.String": + row.setField(index, value); + break; + case "java.lang.Double": + row.setField(index, Double.valueOf(value)); + break; + case "java.lang.Long": + row.setField(index, Long.valueOf(value)); + break; + case "java.lang.Byte": + row.setField(index, Byte.valueOf(value)); + break; + case "java.lang.Short": + row.setField(index, Short.valueOf(value)); + break; + case "java.lang.Float": + row.setField(index, Float.valueOf(value)); + break; + case "java.math.BigDecimal": + row.setField(index, BigDecimal.valueOf(Long.valueOf(value))); + break; + case "java.sql.Timestamp": + row.setField(index, Timestamp.valueOf(value)); + break; + case "java.sql.Date": + row.setField(index, Date.valueOf(value)); + break; + default: + throw new RuntimeException("no support field type. the type: " + classType); + } + } + } diff --git a/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideTableInfo.java b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideTableInfo.java index c463febec..88cfcb8a2 100644 --- a/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideTableInfo.java +++ b/redis5/redis5-side/redis-side-core/src/main/java/com/dtstack/flink/sql/side/redis/table/RedisSideTableInfo.java @@ -19,7 +19,7 @@ package com.dtstack.flink.sql.side.redis.table; import com.dtstack.flink.sql.side.SideTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; public class RedisSideTableInfo extends SideTableInfo { diff --git a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java index 9077c989e..bb3963edb 100644 --- a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java +++ b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/RedisOutputFormat.java @@ -171,7 +171,13 @@ public void writeRecord(Tuple2 record) throws IOException { for (int i = 0; i < fieldNames.length; i++) { StringBuilder key = new StringBuilder(); key.append(tableName).append(":").append(perKey).append(":").append(fieldNames[i]); - jedis.set(key.toString(), row.getField(i).toString()); + + String value = "null"; + Object field = row.getField(i); + if (field != null) { + value = field.toString(); + } + jedis.set(key.toString(), value); } if (outRecords.getCount()%rowLenth == 0){ diff --git a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java index c114b946e..2fcd20dd8 100644 --- a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java +++ b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisSinkParser.java @@ -21,7 +21,10 @@ import com.dtstack.flink.sql.table.AbsTableParser; import com.dtstack.flink.sql.table.TableInfo; import com.dtstack.flink.sql.util.MathUtil; +import org.apache.commons.lang3.StringUtils; +import java.util.ArrayList; +import java.util.Arrays; import java.util.Map; public class RedisSinkParser extends AbsTableParser { @@ -42,6 +45,17 @@ public TableInfo getTableInfo(String tableName, String fieldsInfo, Map primaryKeysList = null; + if (!StringUtils.isEmpty(primaryKeysStr)) { + String[] primaryKeysArray = primaryKeysStr.split(","); + primaryKeysList = new ArrayList(Arrays.asList(primaryKeysArray)); + } else { + primaryKeysList = new ArrayList<>(); + } + redisTableInfo.setPrimaryKeys(primaryKeysList); + return redisTableInfo; } } diff --git a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisTableInfo.java b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisTableInfo.java index 7afb51cdd..82def4115 100644 --- a/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisTableInfo.java +++ b/redis5/redis5-sink/src/main/java/com/dtstack/flink/sql/sink/redis/table/RedisTableInfo.java @@ -19,7 +19,7 @@ package com.dtstack.flink.sql.sink.redis.table; import com.dtstack.flink.sql.table.TargetTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; public class RedisTableInfo extends TargetTableInfo { diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java index ce9a44895..b76e94159 100644 --- a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/CustomerSocketTextStreamFunction.java @@ -20,8 +20,8 @@ import com.dtstack.flink.sql.source.serversocket.table.ServersocketSourceTableInfo; import org.apache.flink.api.common.typeinfo.TypeInformation; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode; -import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; import org.apache.flink.streaming.api.functions.source.SourceFunction; import org.apache.flink.types.Row; import org.apache.flink.util.IOUtils; diff --git a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java index 2ff9a6739..796728eb3 100644 --- a/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java +++ b/serversocket/serversocket-source/src/main/java/com/dtstack/flink/sql/source/serversocket/table/ServersocketSourceTableInfo.java @@ -18,7 +18,7 @@ package com.dtstack.flink.sql.source.serversocket.table; import com.dtstack.flink.sql.table.SourceTableInfo; -import org.apache.flink.calcite.shaded.com.google.common.base.Preconditions; +import com.google.common.base.Preconditions; /** * Reason: diff --git a/sqlserver/sqlserver-side/sqlserver-all-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAllReqRow.java b/sqlserver/sqlserver-side/sqlserver-all-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAllReqRow.java index 837236b61..961539fde 100644 --- a/sqlserver/sqlserver-side/sqlserver-all-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAllReqRow.java +++ b/sqlserver/sqlserver-side/sqlserver-all-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAllReqRow.java @@ -24,10 +24,9 @@ import com.dtstack.flink.sql.side.rdb.all.RdbAllReqRow; import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.flink.api.java.typeutils.RowTypeInfo; -import org.apache.flink.shaded.guava18.com.google.common.collect.Maps; +import com.google.common.collect.Maps; import org.slf4j.Logger; import org.slf4j.LoggerFactory; - import java.sql.Connection; import java.sql.DriverManager; import java.util.List; diff --git a/sqlserver/sqlserver-side/sqlserver-async-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAsyncReqRow.java b/sqlserver/sqlserver-side/sqlserver-async-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAsyncReqRow.java index 41c1dbd4a..cb4240ca6 100644 --- a/sqlserver/sqlserver-side/sqlserver-async-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAsyncReqRow.java +++ b/sqlserver/sqlserver-side/sqlserver-async-side/src/main/java/com/dtstack/flink/sql/side/sqlserver/SqlserverAsyncReqRow.java @@ -59,9 +59,13 @@ public void open(Configuration parameters) throws Exception { .put("idle_connection_test_period", DEFAULT_IDLE_CONNECTION_TEST_PEROID) .put("test_connection_on_checkin", DEFAULT_TEST_CONNECTION_ON_CHECKIN); + + System.setProperty("vertx.disableFileCPResolving", "true"); + VertxOptions vo = new VertxOptions(); vo.setEventLoopPoolSize(DEFAULT_VERTX_EVENT_LOOP_POOL_SIZE); vo.setWorkerPoolSize(DEFAULT_VERTX_WORKER_POOL_SIZE); + vo.setFileResolverCachingEnabled(false); Vertx vertx = Vertx.vertx(vo); setRdbSQLClient(JDBCClient.createNonShared(vertx, sqlserverClientConfig)); } diff --git a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java b/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java index 904cb0895..2341bca66 100644 --- a/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java +++ b/sqlserver/sqlserver-sink/src/main/java/com/dtstack/flink/sql/sink/sqlserver/SqlserverSink.java @@ -21,6 +21,7 @@ import com.dtstack.flink.sql.sink.rdb.RdbSink; import com.dtstack.flink.sql.sink.rdb.format.ExtendOutputFormat; import com.dtstack.flink.sql.sink.rdb.format.RetractJDBCOutputFormat; +import com.dtstack.flink.sql.util.DtStringUtil; import org.apache.commons.lang3.StringUtils; import java.util.*; @@ -46,7 +47,7 @@ public RetractJDBCOutputFormat getOutputFormat() { } @Override - public void buildSql(String tableName, List fields) { + public void buildSql(String scheam, String tableName, List fields) { buildInsertSql(tableName, fields); } @@ -64,13 +65,26 @@ private void buildInsertSql(String tableName, List fields) { } @Override - public String buildUpdateSql(String tableName, List fieldNames, Map> realIndexes, List fullField) { - return "MERGE INTO " + tableName + " T1 USING " + public String buildUpdateSql(String scheam, String tableName, List fieldNames, Map> realIndexes, List fullField) { + StringBuilder sb = new StringBuilder(); + + sb.append("MERGE INTO " + tableName + " T1 USING " + "(" + makeValues(fieldNames) + ") T2 ON (" - + updateKeySql(realIndexes) + ") WHEN MATCHED THEN UPDATE SET " - + getUpdateSql(fieldNames, fullField, "T1", "T2", keyColList(realIndexes)) + " WHEN NOT MATCHED THEN " + + updateKeySql(realIndexes) + ") "); + + + String updateSql = getUpdateSql(fieldNames, fullField, "T1", "T2", keyColList(realIndexes)); + + if (StringUtils.isNotEmpty(updateSql)) { + sb.append(" WHEN MATCHED THEN UPDATE SET "); + sb.append(updateSql); + } + + sb.append(" WHEN NOT MATCHED THEN " + "INSERT (" + quoteColumns(fieldNames) + ") VALUES (" - + quoteColumns(fieldNames, "T2") + ");"; + + quoteColumns(fieldNames, "T2") + ")"); + + return sb.toString(); }