摄取规范
srcIP
srcPort
dstIP
dstPort
protocol
packets
bytes
cost
{"ts":"2018-01-01T01:01:35Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":2000, "dstPort":3000, "protocol": 6, "packets":10, "bytes":1000, "cost": 1.4}
{"ts":"2018-01-01T01:01:51Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":2000, "dstPort":3000, "protocol": 6, "packets":20, "bytes":2000, "cost": 3.1}
{"ts":"2018-01-01T01:01:59Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":2000, "dstPort":3000, "protocol": 6, "packets":30, "bytes":3000, "cost": 0.4}
{"ts":"2018-01-01T01:02:14Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":5000, "dstPort":7000, "protocol": 6, "packets":40, "bytes":4000, "cost": 7.9}
{"ts":"2018-01-01T01:02:29Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":5000, "dstPort":7000, "protocol": 6, "packets":50, "bytes":5000, "cost": 10.2}
{"ts":"2018-01-01T01:03:29Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":5000, "dstPort":7000, "protocol": 6, "packets":60, "bytes":6000, "cost": 4.3}
{"ts":"2018-01-01T02:33:14Z","srcIP":"7.7.7.7", "dstIP":"8.8.8.8", "srcPort":4000, "dstPort":5000, "protocol": 17, "packets":100, "bytes":10000, "cost": 22.4}
{"ts":"2018-01-01T02:33:45Z","srcIP":"7.7.7.7", "dstIP":"8.8.8.8", "srcPort":4000, "dstPort":5000, "protocol": 17, "packets":200, "bytes":20000, "cost": 34.5}
{"ts":"2018-01-01T02:35:45Z","srcIP":"7.7.7.7", "dstIP":"8.8.8.8", "srcPort":4000, "dstPort":5000, "protocol": 17, "packets":300, "bytes":30000, "cost": 46.3}
quickstart/
insertion-tutorial-data.json
dataSchema
列
quickstart/
insertion-tutorial-index.json
"dataSchema" : {}
dataSource
dataSchema
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
}
ingestion-tutorial
dataSchema
timestampSpec
dataSchema
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
}
}
dataSchema
granularitySpec
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"granularitySpec" : {
"rollup" : true
}
}
选择dimension和Metrics
Dimensions
dataSchema
dimensionSpec
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"granularitySpec" : {
"rollup" : true
}
}
名称
类型
类型
srcIP
protocol
Strings vs. Numerics 数字输入应该作为数字维度还是字符串维度接收?
Metrics
dataSchema
metricsSpec
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"rollup" : true
}
}
数据包
字节
cost
metricsSpec
dimensionSpec
parseSpec
dataSchema
parser
count
dimensionsSpec
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" },
{ "name" : "packets", "type" : "long" },
{ "name" : "bytes", "type" : "long" },
{ "name" : "srcPort", "type" : "double" }
]
}
dataSchema
parser
metricsSpec
granularitySpec
granularitySpec
uniform
arbitrary
uniform
DAY
WEEK
段粒度
granularitySpec
SegmentGranularity
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"rollup" : true
}
}
查询粒度
granularitySpec
queryGranularity
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"queryGranularity" : "MINUTE",
"rollup" : true
}
}
{"ts":"2018-01-01T01:03:29Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":5000, "dstPort":7000, "protocol": 6, "packets":60, "bytes":6000, "cost": 4.3}
{"ts":"2018-01-01T01:03:00Z","srcIP":"1.1.1.1", "dstIP":"2.2.2.2", "srcPort":5000, "dstPort":7000, "protocol": 6, "packets":60, "bytes":6000, "cost": 4.3}
定义时间间隔(batch only)
granularitySpec
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"queryGranularity" : "MINUTE",
"intervals" : ["2018-01-01/2018-01-02"],
"rollup" : true
}
}
dataSchema
{
"type" : "index_parallel",
"spec" : {
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"queryGranularity" : "MINUTE",
"intervals" : ["2018-01-01/2018-01-02"],
"rollup" : true
}
}
}
}
ioConfig
ioConfig
inputSource
示例网络流数据
"ioConfig" : {
"type" : "index_parallel",
"inputSource" : {
"type" : "local",
"baseDir" : "quickstart/",
"filter" : "ingestion-tutorial-data.json"
}
}
inputFormat
"ioConfig" : {
"type" : "index_parallel",
"inputSource" : {
"type" : "local",
"baseDir" : "quickstart/",
"filter" : "ingestion-tutorial-data.json"
},
"inputFormat" : {
"type" : "json"
}
}
{
"type" : "index_parallel",
"spec" : {
"dataSchema" : {
"dataSource" : "ingestion-tutorial",
"timestampSpec" : {
"format" : "iso",
"column" : "ts"
},
"dimensionsSpec" : {
"dimensions": [
"srcIP",
{ "name" : "srcPort", "type" : "long" },
{ "name" : "dstIP", "type" : "string" },
{ "name" : "dstPort", "type" : "long" },
{ "name" : "protocol", "type" : "string" }
]
},
"metricsSpec" : [
{ "type" : "count", "name" : "count" },
{ "type" : "longSum", "name" : "packets", "fieldName" : "packets" },
{ "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" },
{ "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" }
],
"granularitySpec" : {
"type" : "uniform",
"segmentGranularity" : "HOUR",
"queryGranularity" : "MINUTE",
"intervals" : ["2018-01-01/2018-01-02"],
"rollup" : true
}
},
"ioConfig" : {
"type" : "index_parallel",
"inputSource" : {
"type" : "local",
"baseDir" : "quickstart/",
"filter" : "ingestion-tutorial-data.json"
},
"inputFormat" : {
"type" : "json"
}
}
}
}
tuningConfig
tuningConfig
"tuningConfig" : {
"type" : "index_parallel",
"maxRowsPerSegment" : 5000000
}
tuningConfig
{ "type" : "index_parallel", "spec" : { "dataSchema" : { "dataSource" : "ingestion-tutorial", "timestampSpec" : { "format" : "iso", "column" : "ts" }, "dimensionsSpec" : { "dimensions": [ "srcIP", { "name" : "srcPort", "type" : "long" }, { "name" : "dstIP", "type" : "string" }, { "name" : "dstPort", "type" : "long" }, { "name" : "protocol", "type" : "string" } ] }, "metricsSpec" : [ { "type" : "count", "name" : "count" }, { "type" : "longSum", "name" : "packets", "fieldName" : "packets" }, { "type" : "longSum", "name" : "bytes", "fieldName" : "bytes" }, { "type" : "doubleSum", "name" : "cost", "fieldName" : "cost" } ], "granularitySpec" : { "type" : "uniform", "segmentGranularity" : "HOUR", "queryGranularity" : "MINUTE", "intervals" : ["2018-01-01/2018-01-02"], "rollup" : true } }, "ioConfig" : { "type" : "index_parallel", "inputSource" : { "type" : "local", "baseDir" : "quickstart/", "filter" : "ingestion-tutorial-data.json" }, "inputFormat" : { "type" : "json" } }, "tuningConfig" : { "type" : "index_parallel", "maxRowsPerSegment" : 5000000 } } }
bin/post-index-task --file quickstart/ingestion-tutorial-index.json --url http://localhost:8081
bin/dsql
select * from "ingestion-tutorial"
$ bin/dsql
Welcome to dsql, the command-line client for Druid SQL.
Type "\h" for help.
dsql> select * from "ingestion-tutorial";
┌──────────────────────────┬───────┬──────┬───────┬─────────┬─────────┬─────────┬──────────┬─────────┬─────────┐
│ __time │ bytes │ cost │ count │ dstIP │ dstPort │ packets │ protocol │ srcIP │ srcPort │
├──────────────────────────┼───────┼──────┼───────┼─────────┼─────────┼─────────┼──────────┼─────────┼─────────┤
│ 2018-01-01T01:01:00.000Z │ 6000 │ 4.9 │ 3 │ 2.2.2.2 │ 3000 │ 60 │ 6 │ 1.1.1.1 │ 2000 │
│ 2018-01-01T01:02:00.000Z │ 9000 │ 18.1 │ 2 │ 2.2.2.2 │ 7000 │ 90 │ 6 │ 1.1.1.1 │ 5000 │
│ 2018-01-01T01:03:00.000Z │ 6000 │ 4.3 │ 1 │ 2.2.2.2 │ 7000 │ 60 │ 6 │ 1.1.1.1 │ 5000 │
│ 2018-01-01T02:33:00.000Z │ 30000 │ 56.9 │ 2 │ 8.8.8.8 │ 5000 │ 300 │ 17 │ 7.7.7.7 │ 4000 │
│ 2018-01-01T02:35:00.000Z │ 30000 │ 46.3 │ 1 │ 8.8.8.8 │ 5000 │ 300 │ 17 │ 7.7.7.7 │ 4000 │
└──────────────────────────┴───────┴──────┴───────┴─────────┴─────────┴─────────┴──────────┴─────────┴─────────┘
Retrieved 5 rows in 0.12s.
dsql>