1. starrocks单机搭建

docker run -p 9030:9030 -p 8030:8030 -p 8040:8040 -itd --name starrocks starrocks/allin1-ubuntu
  1. 下载数据集

wget https://starrocks-public.oss-cn-zhangjiakou.aliyuncs.com/tpch-poc-1.0.zip
unzip tpch-poc-1.0
cd tpch-poc-1.0
  1. 生成数据集

TPC-H 基准模型中定义了一个数据库模型,容量可以在 1GB~10000GB 的 8 个级别中选择。
根据比例因子 Scale Factor(SF)的大小确定数据集的大小,当 SF=1 时,表的大小为 1GB;SF 可取值有(1,10,30,100,300,1000,3000,10000)。

sh bin/gen_data/gen-tpch.sh 1 data_1
  1. 修改tpch-poc-1.0/sql/starrocks.conf,填写starrocks的链接地址与账号密码等;

  1. 在tpch-poc-1.0/sql/tpch下新增ddl_1文件夹,并在文件夹下新增tpch_create.sql文件:

drop database if exists tpch;
create database tpch;
use tpch;

drop table if exists customer;
CREATE TABLE customer ( c_custkey     int NOT NULL,
                        c_name        VARCHAR(25) NOT NULL,
                        c_address     VARCHAR(40) NOT NULL,
                        c_nationkey   int NOT NULL,
                        c_phone       VARCHAR(15) NOT NULL,
                        c_acctbal     decimal(15, 2)   NOT NULL,
                        c_mktsegment  VARCHAR(10) NOT NULL,
                        c_comment     VARCHAR(117) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`c_custkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`c_custkey`) BUCKETS 24
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT"
);


drop table if exists lineitem;
CREATE TABLE lineitem ( l_shipdate    DATE NOT NULL,
                             l_orderkey    int NOT NULL,
                             l_linenumber  int not null,
                             l_partkey     int NOT NULL,
                             l_suppkey     int not null,
                             l_quantity    decimal(15, 2) NOT NULL,
                             l_extendedprice  decimal(15, 2) NOT NULL,
                             l_discount    decimal(15, 2) NOT NULL,
                             l_tax         decimal(15, 2) NOT NULL,
                             l_returnflag  VARCHAR(1) NOT NULL,
                             l_linestatus  VARCHAR(1) NOT NULL,
                             l_commitdate  DATE NOT NULL,
                             l_receiptdate DATE NOT NULL,
                             l_shipinstruct VARCHAR(25) NOT NULL,
                             l_shipmode     VARCHAR(10) NOT NULL,
                             l_comment      VARCHAR(44) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`l_shipdate`, `l_orderkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT",
    "colocate_with" = "group_tpch_1"
);

drop table if exists nation;
CREATE TABLE `nation` (
  `n_nationkey` int(11) NOT NULL,
  `n_name`      varchar(25) NOT NULL,
  `n_regionkey` int(11) NOT NULL,
  `n_comment`   varchar(152) NULL
) ENGINE=OLAP
DUPLICATE KEY(`N_NATIONKEY`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`N_NATIONKEY`) BUCKETS 1
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT"
);

drop table if exists orders;
CREATE TABLE orders  ( o_orderkey       int NOT NULL,
                       o_orderdate      DATE NOT NULL,
                       o_custkey        int NOT NULL,
                       o_orderstatus    VARCHAR(1) NOT NULL,
                       o_totalprice     decimal(15, 2) NOT NULL,
                       o_orderpriority  VARCHAR(15) NOT NULL,
                       o_clerk          VARCHAR(15) NOT NULL,
                       o_shippriority   int NOT NULL,
                       o_comment        VARCHAR(79) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`o_orderkey`, `o_orderdate`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`o_orderkey`) BUCKETS 96
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT",
    "colocate_with" = "group_tpch_1"
);

drop table if exists part;
CREATE TABLE part ( p_partkey          int NOT NULL,
                          p_name        VARCHAR(55) NOT NULL,
                          p_mfgr        VARCHAR(25) NOT NULL,
                          p_brand       VARCHAR(10) NOT NULL,
                          p_type        VARCHAR(25) NOT NULL,
                          p_size        int NOT NULL,
                          p_container   VARCHAR(10) NOT NULL,
                          p_retailprice decimal(15, 2) NOT NULL,
                          p_comment     VARCHAR(23) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`p_partkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`p_partkey`) BUCKETS 24
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT",
    "colocate_with" = "group_tpch_1p"
);

drop table if exists partsupp;
CREATE TABLE partsupp ( ps_partkey          int NOT NULL,
                             ps_suppkey     int NOT NULL,
                             ps_availqty    int NOT NULL,
                             ps_supplycost  decimal(15, 2)  NOT NULL,
                             ps_comment     VARCHAR(199) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`ps_partkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`ps_partkey`) BUCKETS 24
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT",
    "colocate_with" = "group_tpch_1p"
);

drop table if exists region;
CREATE TABLE region  ( r_regionkey      int NOT NULL,
                            r_name       VARCHAR(25) NOT NULL,
                            r_comment    VARCHAR(152))
ENGINE=OLAP
DUPLICATE KEY(`r_regionkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`r_regionkey`) BUCKETS 1
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT"
);

drop table if exists supplier;
CREATE TABLE supplier (  s_suppkey       int NOT NULL,
                             s_name        VARCHAR(25) NOT NULL,
                             s_address     VARCHAR(40) NOT NULL,
                             s_nationkey   int NOT NULL,
                             s_phone       VARCHAR(15) NOT NULL,
                             s_acctbal     decimal(15, 2) NOT NULL,
                             s_comment     VARCHAR(101) NOT NULL)
ENGINE=OLAP
DUPLICATE KEY(`s_suppkey`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`s_suppkey`) BUCKETS 12
PROPERTIES (
    "replication_num" = "1",
    "in_memory" = "false",
    "storage_format" = "DEFAULT"
);

drop view if exists revenue0;
create view revenue0 (supplier_no, total_revenue) as
        select
                l_suppkey,
                sum(l_extendedprice * (1 - l_discount))
        from
                lineitem
        where
                l_shipdate >= date '1996-01-01'
                and l_shipdate < date '1996-01-01' + interval '3' month
        group by
                l_suppkey;
  1. 执行表创建sh bin/create_db_table.sh ddl_1

  2. 执行数据导入sh bin/stream_load.sh data_1

  3. 执行测试sh bin/benchmark.sh