mirror of
https://github.com/grafana/tempo.git
synced 2025-03-14 03:06:42 +00:00
Do less Nexting (#4753)
* this is garbage Signed-off-by: Joe Elliott <number101010@gmail.com> * filtery stuff Signed-off-by: Joe Elliott <number101010@gmail.com> * fix Signed-off-by: Joe Elliott <number101010@gmail.com> * max def everywhere Signed-off-by: Joe Elliott <number101010@gmail.com> * clean up benches Signed-off-by: Joe Elliott <number101010@gmail.com> * clean up Signed-off-by: Joe Elliott <number101010@gmail.com> * remove vendor chagnes Signed-off-by: Joe Elliott <number101010@gmail.com> * changelog Signed-off-by: Joe Elliott <number101010@gmail.com> * add details about bench env vars Signed-off-by: Joe Elliott <number101010@gmail.com> --------- Signed-off-by: Joe Elliott <number101010@gmail.com>
This commit is contained in:
@ -34,6 +34,7 @@ configurable via the throughput_bytes_slo field, and it will populate op="traces
|
||||
* [ENHANCEMENT] Improve block-builder performance [#4596](https://github.com/grafana/tempo/pull/4596) (@mdisibio)
|
||||
* [ENHANCEMENT] Improve block-builder performance by not using WAL stage [#4647](https://github.com/grafana/tempo/pull/4647) [#4671](https://github.com/grafana/tempo/pull/4671) (@mdisibio)
|
||||
* [ENHANCEMENT] Export new `tempo_ingest_group_partition_lag` metric from block-builders and metrics-generators [#4571](https://github.com/grafana/tempo/pull/4571) (@mdisibio)
|
||||
* [ENHANCEMENT] Overall iterator performance improvement by using max definition level to ignore parts of the RowNumber while nexting. [#4753](https://github.com/grafana/tempo/pull/4753) (@joe-elliott)
|
||||
* [ENHANCEMENT] Use distroless base container images for improved security [#4556](https://github.com/grafana/tempo/pull/4556) (@carles-grafana)
|
||||
* [ENHANCEMENT] rythm: add block builder to resources dashboard[#4556](https://github.com/grafana/tempo/pull/4669) (@javiermolinar)
|
||||
* [ENHANCEMENT] update dskit to latest version[#4681](https://github.com/grafana/tempo/pull/4681) (@javiermolinar)
|
||||
|
@ -240,10 +240,10 @@ type attribute struct {
|
||||
}
|
||||
|
||||
func aggregateAttributes(pf *parquet.File, keyPath string, valuePaths []string) (genericAttrSummary, error) {
|
||||
keyIdx, _ := pq.GetColumnIndexByPath(pf, keyPath)
|
||||
keyIdx, _, _ := pq.GetColumnIndexByPath(pf, keyPath)
|
||||
valueIdxs := make([]int, 0, len(valuePaths))
|
||||
for _, v := range valuePaths {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, v)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, v)
|
||||
valueIdxs = append(valueIdxs, idx)
|
||||
}
|
||||
|
||||
@ -311,7 +311,7 @@ func aggregateDedicatedColumns(pf *parquet.File, scope backend.DedicatedColumnSc
|
||||
}
|
||||
|
||||
func aggregateColumn(pf *parquet.File, colName string) (uint64, error) {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, colName)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, colName)
|
||||
calc, err := inspect.NewRowStatCalculator(pf, inspect.RowStatOptions{
|
||||
Columns: []int{idx},
|
||||
})
|
||||
|
@ -38,7 +38,7 @@ func (cmd *listColumnCmd) Run(ctx *globalOptions) error {
|
||||
return err
|
||||
}
|
||||
|
||||
colIndex, _ := pq.GetColumnIndexByPath(pf, cmd.Column)
|
||||
colIndex, _, _ := pq.GetColumnIndexByPath(pf, cmd.Column)
|
||||
|
||||
for i, rg := range pf.RowGroups() {
|
||||
|
||||
|
@ -126,7 +126,7 @@ func getAllTraceIDs(t *testing.T, dir string, tenant string) []string {
|
||||
err := r.Close()
|
||||
require.NoError(t, err)
|
||||
}()
|
||||
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
|
||||
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
|
||||
require.GreaterOrEqual(t, traceIDIndex, 0)
|
||||
defer func() {
|
||||
err := r.Close()
|
||||
|
@ -128,429 +128,7 @@ func (t *RowNumber) Valid() bool {
|
||||
// null | 1 | 1 | { 0, 1, -1, -1 }
|
||||
// gb | 1 | 3 | { 0, 2, 0, 0 }
|
||||
// null | 0 | 1 | { 1, 0, -1, -1 }
|
||||
func (t *RowNumber) Next(repetitionLevel, definitionLevel int) {
|
||||
t[repetitionLevel]++
|
||||
|
||||
// the following is nextSlow() unrolled
|
||||
switch repetitionLevel {
|
||||
case 0:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[1] = 0
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[1] = 0
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 1:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[2] = 0
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[2] = 0
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 2:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[3] = 0
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[3] = 0
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 3:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[4] = 0
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[4] = 0
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 4:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[5] = 0
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[5] = 0
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 5:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[6] = 0
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[6] = 0
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 6:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[7] = -1
|
||||
case 7:
|
||||
t[7] = 0
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
case 7:
|
||||
switch definitionLevel {
|
||||
case 0:
|
||||
t[1] = -1
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 1:
|
||||
t[2] = -1
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 2:
|
||||
t[3] = -1
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 3:
|
||||
t[4] = -1
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 4:
|
||||
t[5] = -1
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 5:
|
||||
t[6] = -1
|
||||
t[7] = -1
|
||||
case 6:
|
||||
t[7] = -1
|
||||
case 7:
|
||||
default:
|
||||
panicWhenInvalidDefinitionLevel(definitionLevel)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// nextSlow is the original implementation of next. it is kept to test against
|
||||
// the unrolled version above
|
||||
func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
|
||||
func (t *RowNumber) Next(repetitionLevel, definitionLevel, maxDefinitionLevel int) {
|
||||
t[repetitionLevel]++
|
||||
|
||||
// New children up through the definition level
|
||||
@ -559,7 +137,7 @@ func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
|
||||
}
|
||||
|
||||
// // Children past the definition level are undefined
|
||||
for i := definitionLevel + 1; i < len(t); i++ {
|
||||
for i := definitionLevel + 1; i < len(t) && i <= maxDefinitionLevel; i++ {
|
||||
t[i] = -1
|
||||
}
|
||||
}
|
||||
@ -803,13 +381,15 @@ type SyncIterator struct {
|
||||
currPageN int
|
||||
at IteratorResult // Current value pointed at by iterator. Returned by call Next and SeekTo, valid until next call.
|
||||
|
||||
maxDefinitionLevel int
|
||||
|
||||
intern bool
|
||||
interner *intern.Interner
|
||||
}
|
||||
|
||||
var _ Iterator = (*SyncIterator)(nil)
|
||||
|
||||
func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, opts ...SyncIteratorOpt) *SyncIterator {
|
||||
func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int, opts ...SyncIteratorOpt) *SyncIterator {
|
||||
// Assign row group bounds.
|
||||
// Lower bound is inclusive
|
||||
// Upper bound is exclusive, points at the first row of the next group
|
||||
@ -841,16 +421,17 @@ func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnN
|
||||
|
||||
// Create the iterator
|
||||
i := &SyncIterator{
|
||||
span: span,
|
||||
column: column,
|
||||
columnName: columnName,
|
||||
rgs: rgs,
|
||||
readSize: readSize,
|
||||
rgsMin: rgsMin,
|
||||
rgsMax: rgsMax,
|
||||
filter: filter,
|
||||
curr: EmptyRowNumber(),
|
||||
at: at,
|
||||
span: span,
|
||||
column: column,
|
||||
columnName: columnName,
|
||||
rgs: rgs,
|
||||
readSize: readSize,
|
||||
rgsMin: rgsMin,
|
||||
rgsMax: rgsMax,
|
||||
filter: filter,
|
||||
curr: EmptyRowNumber(),
|
||||
at: at,
|
||||
maxDefinitionLevel: maxDefinitionLevel,
|
||||
}
|
||||
|
||||
// Apply options
|
||||
@ -1160,7 +741,7 @@ func (c *SyncIterator) next() (RowNumber, *pq.Value, error) {
|
||||
|
||||
// Inspect all values to track the current row number,
|
||||
// even if the value is filtered out next.
|
||||
c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel())
|
||||
c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)
|
||||
c.currBufN++
|
||||
c.currPageN++
|
||||
|
||||
@ -1260,11 +841,12 @@ func (c *SyncIterator) Close() {
|
||||
// the optional predicate to each chunk, page, and value. Results are read by calling
|
||||
// Next() until it returns nil.
|
||||
type ColumnIterator struct {
|
||||
rgs []pq.RowGroup
|
||||
col int
|
||||
colName string
|
||||
filter *InstrumentedPredicate
|
||||
selectAs string
|
||||
rgs []pq.RowGroup
|
||||
col int
|
||||
colName string
|
||||
filter *InstrumentedPredicate
|
||||
selectAs string
|
||||
maxDefinitionLevel int
|
||||
|
||||
// Row number to seek to, protected by mutex.
|
||||
// Less allocs than storing in atomic.Value
|
||||
@ -1288,16 +870,17 @@ type columnIteratorBuffer struct {
|
||||
values []pq.Value
|
||||
}
|
||||
|
||||
func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string) *ColumnIterator {
|
||||
func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int) *ColumnIterator {
|
||||
c := &ColumnIterator{
|
||||
rgs: rgs,
|
||||
col: column,
|
||||
colName: columnName,
|
||||
filter: &InstrumentedPredicate{pred: filter},
|
||||
selectAs: selectAs,
|
||||
quit: make(chan struct{}),
|
||||
ch: make(chan *columnIteratorBuffer, 1),
|
||||
currN: -1,
|
||||
rgs: rgs,
|
||||
col: column,
|
||||
colName: columnName,
|
||||
filter: &InstrumentedPredicate{Pred: filter},
|
||||
selectAs: selectAs,
|
||||
quit: make(chan struct{}),
|
||||
ch: make(chan *columnIteratorBuffer, 1),
|
||||
currN: -1,
|
||||
maxDefinitionLevel: maxDefinitionLevel,
|
||||
}
|
||||
|
||||
c.iter = func() { c.iterate(ctx, readSize) }
|
||||
@ -1417,7 +1000,7 @@ func (c *ColumnIterator) iterate(ctx context.Context, readSize int) {
|
||||
|
||||
// We have to do this for all values (even if the
|
||||
// value is excluded by the predicate)
|
||||
rn.Next(v.RepetitionLevel(), v.DefinitionLevel())
|
||||
rn.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)
|
||||
|
||||
if c.filter != nil {
|
||||
if !c.filter.KeepValue(v) {
|
||||
@ -2015,7 +1598,6 @@ func (u *UnionIterator) Next() (*IteratorResult, error) {
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("union iterator peek failed: %w", err)
|
||||
}
|
||||
|
||||
// If this iterator is exhausted go to the next one
|
||||
if rn == nil {
|
||||
continue
|
||||
|
@ -3,7 +3,6 @@ package parquetquery
|
||||
import (
|
||||
"context"
|
||||
"math"
|
||||
"math/rand"
|
||||
"os"
|
||||
"strconv"
|
||||
"testing"
|
||||
@ -19,30 +18,13 @@ var iterTestCases = []struct {
|
||||
makeIter makeTestIterFn
|
||||
}{
|
||||
{"async", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
|
||||
return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
|
||||
return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
|
||||
}},
|
||||
{"sync", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
|
||||
return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
|
||||
return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
|
||||
}},
|
||||
}
|
||||
|
||||
// TestNext compares the unrolled Next() with the original nextSlow() to
|
||||
// prevent drift
|
||||
func TestNext(t *testing.T) {
|
||||
rn1 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
|
||||
rn2 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
|
||||
|
||||
for i := 0; i < 1000; i++ {
|
||||
r := rand.Intn(MaxDefinitionLevel + 1)
|
||||
d := rand.Intn(MaxDefinitionLevel + 1)
|
||||
|
||||
rn1.Next(r, d)
|
||||
rn2.nextSlow(r, d)
|
||||
|
||||
require.Equal(t, rn1, rn2)
|
||||
}
|
||||
}
|
||||
|
||||
// TestTruncate compares the unrolled TruncateRowNumber() with the original truncateRowNumberSlow() to
|
||||
// prevent drift
|
||||
func TestTruncateRowNumber(t *testing.T) {
|
||||
@ -73,44 +55,26 @@ func TestInvalidDefinitionLevelTruncate(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
func TestInvalidDefinitionLevelNext(t *testing.T) {
|
||||
t.Run("Next -1", func(t *testing.T) {
|
||||
assertPanic(t, func() {
|
||||
rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
|
||||
r := 0
|
||||
d := -1
|
||||
rn.Next(r, d)
|
||||
})
|
||||
})
|
||||
t.Run("Next Max+1", func(t *testing.T) {
|
||||
assertPanic(t, func() {
|
||||
rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
|
||||
r := 0
|
||||
d := MaxDefinitionLevel + 1
|
||||
rn.Next(r, d)
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
func TestRowNumber(t *testing.T) {
|
||||
func TestRowNumberNext(t *testing.T) {
|
||||
tr := EmptyRowNumber()
|
||||
require.Equal(t, RowNumber{-1, -1, -1, -1, -1, -1, -1, -1}, tr)
|
||||
|
||||
steps := []struct {
|
||||
repetitionLevel int
|
||||
definitionLevel int
|
||||
expected RowNumber
|
||||
repetitionLevel int
|
||||
definitionLevel int
|
||||
maxDefinitionLevel int
|
||||
expected RowNumber
|
||||
}{
|
||||
// Name.Language.Country examples from the Dremel whitepaper
|
||||
{0, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
|
||||
{2, 2, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
|
||||
{1, 1, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
|
||||
{1, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
|
||||
{0, 1, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
|
||||
{0, 3, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
|
||||
{2, 2, 3, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
|
||||
{1, 1, 3, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
|
||||
{1, 3, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
|
||||
{0, 1, 3, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
|
||||
}
|
||||
|
||||
for _, step := range steps {
|
||||
tr.Next(step.repetitionLevel, step.definitionLevel)
|
||||
tr.Next(step.repetitionLevel, step.definitionLevel, step.maxDefinitionLevel)
|
||||
require.Equal(t, step.expected, tr)
|
||||
}
|
||||
}
|
||||
@ -158,7 +122,7 @@ func testColumnIterator(t *testing.T, makeIter makeTestIterFn) {
|
||||
count := 100_000
|
||||
pf := createTestFile(t, count)
|
||||
|
||||
idx, _ := GetColumnIndexByPath(pf, "A")
|
||||
idx, _, _ := GetColumnIndexByPath(pf, "A")
|
||||
iter := makeIter(pf, idx, nil, "A")
|
||||
defer iter.Close()
|
||||
|
||||
@ -187,7 +151,7 @@ func testColumnIteratorSeek(t *testing.T, makeIter makeTestIterFn) {
|
||||
count := 10_000
|
||||
pf := createTestFile(t, count)
|
||||
|
||||
idx, _ := GetColumnIndexByPath(pf, "A")
|
||||
idx, _, _ := GetColumnIndexByPath(pf, "A")
|
||||
iter := makeIter(pf, idx, nil, "A")
|
||||
defer iter.Close()
|
||||
|
||||
@ -224,7 +188,7 @@ func testColumnIteratorPredicate(t *testing.T, makeIter makeTestIterFn) {
|
||||
|
||||
pred := NewIntBetweenPredicate(7001, 7003)
|
||||
|
||||
idx, _ := GetColumnIndexByPath(pf, "A")
|
||||
idx, _, _ := GetColumnIndexByPath(pf, "A")
|
||||
iter := makeIter(pf, idx, pred, "A")
|
||||
defer iter.Close()
|
||||
|
||||
@ -253,7 +217,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
|
||||
}
|
||||
|
||||
pf := createFileWith(t, rows)
|
||||
idx, _ := GetColumnIndexByPath(pf, "A")
|
||||
idx, _, _ := GetColumnIndexByPath(pf, "A")
|
||||
readSize := 1000
|
||||
|
||||
readIter := func(iter Iterator) (int, error) {
|
||||
@ -275,7 +239,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
|
||||
// Cancel before iterating
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
cancel()
|
||||
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
|
||||
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
|
||||
count, err := readIter(iter)
|
||||
require.ErrorContains(t, err, "context canceled")
|
||||
require.Equal(t, 0, count)
|
||||
@ -283,7 +247,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
|
||||
|
||||
t.Run("cancelledPartial", func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.TODO())
|
||||
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
|
||||
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
|
||||
|
||||
// Read some results
|
||||
_, err := iter.Next()
|
||||
@ -299,7 +263,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
|
||||
|
||||
t.Run("closedEarly", func(t *testing.T) {
|
||||
// Close before iterating
|
||||
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
|
||||
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
|
||||
iter.Close()
|
||||
count, err := readIter(iter)
|
||||
require.NoError(t, err)
|
||||
@ -307,7 +271,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
|
||||
})
|
||||
|
||||
t.Run("closedPartial", func(t *testing.T) {
|
||||
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
|
||||
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
|
||||
|
||||
// Read some results
|
||||
_, err := iter.Next()
|
||||
@ -335,7 +299,7 @@ func benchmarkColumnIterator(b *testing.B, makeIter makeTestIterFn) {
|
||||
count := 100_000
|
||||
pf := createTestFile(b, count)
|
||||
|
||||
idx, _ := GetColumnIndexByPath(pf, "A")
|
||||
idx, _, _ := GetColumnIndexByPath(pf, "A")
|
||||
|
||||
b.ResetTimer()
|
||||
|
||||
|
@ -237,9 +237,9 @@ func testPredicate(t *testing.T, tc predicateTestCase) {
|
||||
r, err := parquet.OpenFile(file, int64(buf.Len()))
|
||||
require.NoError(t, err)
|
||||
|
||||
p := InstrumentedPredicate{pred: tc.predicate}
|
||||
p := InstrumentedPredicate{Pred: tc.predicate}
|
||||
|
||||
i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "")
|
||||
i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "", MaxDefinitionLevel)
|
||||
for {
|
||||
res, err := i.Next()
|
||||
require.NoError(t, err)
|
||||
|
@ -372,7 +372,7 @@ func (p *OrPredicate) KeepValue(v pq.Value) bool {
|
||||
}
|
||||
|
||||
type InstrumentedPredicate struct {
|
||||
pred Predicate // Optional, if missing then just keeps metrics with no filtering
|
||||
Pred Predicate // Optional, if missing then just keeps metrics with no filtering
|
||||
InspectedColumnChunks int64
|
||||
InspectedPages int64
|
||||
InspectedValues int64
|
||||
@ -384,16 +384,16 @@ type InstrumentedPredicate struct {
|
||||
var _ Predicate = (*InstrumentedPredicate)(nil)
|
||||
|
||||
func (p *InstrumentedPredicate) String() string {
|
||||
if p.pred == nil {
|
||||
if p.Pred == nil {
|
||||
return fmt.Sprintf("InstrumentedPredicate{%d, nil}", p.InspectedValues)
|
||||
}
|
||||
return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.pred)
|
||||
return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.Pred)
|
||||
}
|
||||
|
||||
func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
|
||||
p.InspectedColumnChunks++
|
||||
|
||||
if p.pred == nil || p.pred.KeepColumnChunk(c) {
|
||||
if p.Pred == nil || p.Pred.KeepColumnChunk(c) {
|
||||
p.KeptColumnChunks++
|
||||
return true
|
||||
}
|
||||
@ -404,7 +404,7 @@ func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
|
||||
func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
|
||||
p.InspectedPages++
|
||||
|
||||
if p.pred == nil || p.pred.KeepPage(page) {
|
||||
if p.Pred == nil || p.Pred.KeepPage(page) {
|
||||
p.KeptPages++
|
||||
return true
|
||||
}
|
||||
@ -415,7 +415,7 @@ func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
|
||||
func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool {
|
||||
p.InspectedValues++
|
||||
|
||||
if p.pred == nil || p.pred.KeepValue(v) {
|
||||
if p.Pred == nil || p.Pred.KeepValue(v) {
|
||||
p.KeptValues++
|
||||
return true
|
||||
}
|
||||
|
@ -6,23 +6,23 @@ import (
|
||||
pq "github.com/parquet-go/parquet-go"
|
||||
)
|
||||
|
||||
func GetColumnIndexByPath(pf *pq.File, s string) (index, depth int) {
|
||||
func GetColumnIndexByPath(pf *pq.File, s string) (index, depth, maxDef int) {
|
||||
colSelector := strings.Split(s, ".")
|
||||
n := pf.Root()
|
||||
for len(colSelector) > 0 {
|
||||
n = n.Column(colSelector[0])
|
||||
if n == nil {
|
||||
return -1, -1
|
||||
return -1, -1, -1
|
||||
}
|
||||
|
||||
colSelector = colSelector[1:]
|
||||
depth++
|
||||
}
|
||||
|
||||
return n.Index(), depth
|
||||
return n.Index(), depth, n.MaxDefinitionLevel()
|
||||
}
|
||||
|
||||
func HasColumn(pf *pq.File, s string) bool {
|
||||
index, _ := GetColumnIndexByPath(pf, s)
|
||||
index, _, _ := GetColumnIndexByPath(pf, s)
|
||||
return index >= 0
|
||||
}
|
||||
|
@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
|
||||
|
||||
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
|
||||
// traceID column index
|
||||
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if colIndex == -1 {
|
||||
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
|
||||
}
|
||||
@ -227,7 +227,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
|
||||
}
|
||||
|
||||
// Now iterate the matching row group
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
|
||||
defer iter.Close()
|
||||
|
||||
res, err := iter.Next()
|
||||
|
@ -34,7 +34,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
|
||||
return nil, err
|
||||
}
|
||||
|
||||
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if traceIDIndex < 0 {
|
||||
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
|
||||
}
|
||||
|
@ -356,14 +356,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
|
||||
|
||||
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
|
||||
index, _ := pq.GetColumnIndexByPath(pf, name)
|
||||
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
|
||||
if index == -1 {
|
||||
// TODO - don't panic, error instead
|
||||
panic("column not found in parquet file:" + name)
|
||||
}
|
||||
|
||||
if async {
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
|
||||
}
|
||||
|
||||
var opts []pq.SyncIteratorOpt
|
||||
@ -371,7 +371,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
opts = append(opts, pq.SyncIteratorOptIntern())
|
||||
}
|
||||
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -69,14 +69,14 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
|
||||
specialAttrIdxs := map[int]string{}
|
||||
|
||||
// standard resource attributes
|
||||
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
if resourceKeyIdx == -1 {
|
||||
return fmt.Errorf("resource attributes col not found (%d)", resourceKeyIdx)
|
||||
}
|
||||
|
||||
// special resource attributes
|
||||
for lbl, col := range specialMappings {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
if idx == -1 {
|
||||
continue
|
||||
}
|
||||
|
@ -244,7 +244,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
|
||||
|
||||
pf := file.parquetFile
|
||||
|
||||
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
r := parquet.NewReader(pf)
|
||||
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
|
||||
// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
|
||||
// atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
|
||||
hasValues := func(path string, pf *parquet.File) bool {
|
||||
idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
|
||||
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
|
||||
md := pf.Metadata()
|
||||
for _, rg := range md.RowGroups {
|
||||
col := rg.Columns[idx]
|
||||
|
@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
|
||||
|
||||
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
|
||||
// traceID column index
|
||||
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if colIndex == -1 {
|
||||
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
|
||||
}
|
||||
@ -230,7 +230,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
|
||||
}
|
||||
|
||||
// Now iterate the matching row group
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
|
||||
defer iter.Close()
|
||||
|
||||
res, err := iter.Next()
|
||||
|
@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
|
||||
return nil, err
|
||||
}
|
||||
|
||||
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if traceIDIndex < 0 {
|
||||
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
|
||||
}
|
||||
|
@ -354,14 +354,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
|
||||
|
||||
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
|
||||
index, _ := pq.GetColumnIndexByPath(pf, name)
|
||||
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
|
||||
if index == -1 {
|
||||
// TODO - don't panic, error instead
|
||||
panic("column not found in parquet file:" + name)
|
||||
}
|
||||
|
||||
if async {
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
|
||||
}
|
||||
|
||||
var opts []pq.SyncIteratorOpt
|
||||
@ -369,7 +369,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
opts = append(opts, pq.SyncIteratorOptIntern())
|
||||
}
|
||||
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,11 +70,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
|
||||
specialAttrIdxs := map[int]string{}
|
||||
|
||||
// standard attributes
|
||||
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
|
||||
// special attributes
|
||||
for lbl, col := range specialMappings {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
if idx == -1 {
|
||||
continue
|
||||
}
|
||||
@ -84,7 +84,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
|
||||
|
||||
// dedicated attributes
|
||||
columnMapping.forEach(func(lbl string, c dedicatedColumn) {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
|
||||
if idx == -1 {
|
||||
return
|
||||
}
|
||||
|
@ -249,7 +249,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
|
||||
|
||||
pf := file.parquetFile
|
||||
|
||||
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
r := parquet.NewReader(pf)
|
||||
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
|
||||
}
|
||||
|
@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
|
||||
// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
|
||||
// atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
|
||||
hasValues := func(path string, pf *parquet.File) bool {
|
||||
idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
|
||||
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
|
||||
md := pf.Metadata()
|
||||
for _, rg := range md.RowGroups {
|
||||
col := rg.Columns[idx]
|
||||
|
@ -3,17 +3,13 @@ package vparquet4
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"path"
|
||||
"sort"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/grafana/tempo/pkg/collector"
|
||||
"github.com/grafana/tempo/pkg/tempopb"
|
||||
"github.com/grafana/tempo/pkg/traceql"
|
||||
"github.com/grafana/tempo/pkg/util/test"
|
||||
"github.com/grafana/tempo/tempodb/backend"
|
||||
"github.com/grafana/tempo/tempodb/backend/local"
|
||||
"github.com/grafana/tempo/tempodb/encoding/common"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
@ -701,21 +697,7 @@ func BenchmarkFetchTagValues(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
|
||||
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
// Path: path.Join("/Users/marty/src/tmp/"),
|
||||
Path: path.Join("/Users/joe/testblock"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
block := blockForBenchmarks(b)
|
||||
opts := common.DefaultSearchOptions()
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -782,21 +764,7 @@ func BenchmarkFetchTags(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
|
||||
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
// Path: path.Join("/Users/marty/src/tmp/"),
|
||||
Path: path.Join("/Users/joe/testblock"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
block := blockForBenchmarks(b)
|
||||
opts := common.DefaultSearchOptions()
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
@ -146,7 +146,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
|
||||
|
||||
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.Trace, error) {
|
||||
// traceID column index
|
||||
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if colIndex == -1 {
|
||||
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
|
||||
}
|
||||
@ -237,7 +237,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
|
||||
}
|
||||
|
||||
// Now iterate the matching row group
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
|
||||
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
|
||||
defer iter.Close()
|
||||
|
||||
res, err := iter.Next()
|
||||
|
@ -144,52 +144,10 @@ func TestBackendBlockFindTraceByID_TestData(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
/*func genIndex(t require.TestingT, block *backendBlock) *index {
|
||||
pf, _, err := block.openForSearch(context.TODO(), common.DefaultSearchOptions())
|
||||
require.NoError(t, err)
|
||||
|
||||
i := &index{}
|
||||
|
||||
for j := range pf.RowGroups() {
|
||||
iter := parquetquery.NewSyncIterator(context.TODO(), pf.RowGroups()[j:j+1], 0, "", 1000, nil, "TraceID")
|
||||
defer iter.Close()
|
||||
|
||||
for {
|
||||
v, err := iter.Next()
|
||||
require.NoError(t, err)
|
||||
if v == nil {
|
||||
break
|
||||
}
|
||||
|
||||
i.Add(v.Entries[0].Value.ByteArray())
|
||||
}
|
||||
i.Flush()
|
||||
}
|
||||
|
||||
return i
|
||||
}*/
|
||||
|
||||
func BenchmarkFindTraceByID(b *testing.B) {
|
||||
var (
|
||||
ctx = context.TODO()
|
||||
tenantID = "1"
|
||||
blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
|
||||
path = "/Users/marty/src/tmp/"
|
||||
)
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path,
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
// ww := backend.NewWriter(w)
|
||||
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
ctx := context.TODO()
|
||||
traceID := []byte{}
|
||||
block := newBackendBlock(meta, rr)
|
||||
block := blockForBenchmarks(b)
|
||||
|
||||
// index := genIndex(b, block)
|
||||
// writeBlockMeta(ctx, ww, meta, &common.ShardedBloomFilter{}, index)
|
||||
|
@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
|
||||
return nil, err
|
||||
}
|
||||
|
||||
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
if traceIDIndex < 0 {
|
||||
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
|
||||
}
|
||||
|
@ -357,14 +357,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
|
||||
|
||||
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
|
||||
index, _ := pq.GetColumnIndexByPath(pf, name)
|
||||
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
|
||||
if index == -1 {
|
||||
// TODO - don't panic, error instead
|
||||
panic("column not found in parquet file:" + name)
|
||||
}
|
||||
|
||||
if async {
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
|
||||
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
|
||||
}
|
||||
|
||||
var opts []pq.SyncIteratorOpt
|
||||
@ -372,7 +372,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
|
||||
opts = append(opts, pq.SyncIteratorOptIntern())
|
||||
}
|
||||
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
|
||||
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -71,11 +71,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
|
||||
specialAttrIdxs := map[int]string{}
|
||||
|
||||
// standard attributes
|
||||
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
|
||||
|
||||
// special attributes
|
||||
for lbl, col := range specialMappings {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
|
||||
if idx == -1 {
|
||||
continue
|
||||
}
|
||||
@ -85,7 +85,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
|
||||
|
||||
// dedicated attributes
|
||||
columnMapping.forEach(func(lbl string, c dedicatedColumn) {
|
||||
idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
|
||||
idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
|
||||
if idx == -1 {
|
||||
return
|
||||
}
|
||||
|
@ -2,14 +2,10 @@ package vparquet4
|
||||
|
||||
import (
|
||||
"context"
|
||||
"path"
|
||||
"testing"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/grafana/tempo/pkg/collector"
|
||||
"github.com/grafana/tempo/pkg/traceql"
|
||||
"github.com/grafana/tempo/tempodb/backend"
|
||||
"github.com/grafana/tempo/tempodb/backend/local"
|
||||
"github.com/grafana/tempo/tempodb/encoding/common"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
@ -189,19 +185,8 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) {
|
||||
|
||||
func BenchmarkBackendBlockSearchTags(b *testing.B) {
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
|
||||
block := blockForBenchmarks(b)
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path.Join("/Users/marty/src/tmp/"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
opts := common.DefaultSearchOptions()
|
||||
d := collector.NewDistinctString(1_000_000, 0, 0)
|
||||
mc := collector.NewMetricsCollector()
|
||||
@ -221,19 +206,7 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path.Join("/Users/marty/src/tmp/"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
block := blockForBenchmarks(b)
|
||||
opts := common.DefaultSearchOptions()
|
||||
|
||||
for _, tc := range testCases {
|
||||
|
@ -3,7 +3,6 @@ package vparquet4
|
||||
import (
|
||||
"context"
|
||||
"math/rand"
|
||||
"path"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -412,19 +411,7 @@ func BenchmarkBackendBlockSearchTraces(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path.Join("/Users/marty/src/tmp/"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
block := blockForBenchmarks(b)
|
||||
|
||||
opts := common.DefaultSearchOptions()
|
||||
opts.StartPage = 10
|
||||
|
@ -6,7 +6,6 @@ import (
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
@ -17,6 +16,7 @@ import (
|
||||
"github.com/stretchr/testify/require"
|
||||
|
||||
"github.com/grafana/tempo/pkg/parquetquery"
|
||||
pq "github.com/grafana/tempo/pkg/parquetquery"
|
||||
"github.com/grafana/tempo/pkg/tempopb"
|
||||
v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
|
||||
"github.com/grafana/tempo/pkg/traceql"
|
||||
@ -957,28 +957,13 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
|
||||
// blockID := uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
|
||||
blockID := uuid.MustParse("030c8c4f-9d47-4916-aadc-26b90b1d2bc4")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
// Path: path.Join("/Users/marty/src/tmp"),
|
||||
// Path: path.Join("/Users/mapno/workspace/testblock"),
|
||||
Path: path.Join("/Users/joe/testblock"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
opts := common.DefaultSearchOptions()
|
||||
opts.StartPage = 3
|
||||
opts.TotalPages = 2
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
_, _, err = block.openForSearch(ctx, opts)
|
||||
block := blockForBenchmarks(b)
|
||||
|
||||
_, _, err := block.openForSearch(ctx, opts)
|
||||
require.NoError(b, err)
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -1016,27 +1001,12 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
|
||||
}
|
||||
|
||||
ctx := context.TODO()
|
||||
tenantID := "1"
|
||||
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
|
||||
blockID := uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
// Path: path.Join("/Users/marty/src/tmp/"),
|
||||
Path: path.Join("/Users/suraj/wd/grafana/testblock"),
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
require.Equal(b, VersionString, meta.Version)
|
||||
|
||||
opts := common.DefaultSearchOptions()
|
||||
opts.StartPage = 10
|
||||
opts.TotalPages = 10
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
_, _, err = block.openForSearch(ctx, opts)
|
||||
block := blockForBenchmarks(b)
|
||||
_, _, err := block.openForSearch(ctx, opts)
|
||||
require.NoError(b, err)
|
||||
|
||||
for _, tc := range testCases {
|
||||
@ -1057,6 +1027,71 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
// BenchmarkIterators is a convenient method to run benchmarks on various iterator constructions directly when working on optimizations.
|
||||
// Replace the iterator at the beginning of the benchmark loop with any combination desired.
|
||||
func BenchmarkIterators(b *testing.B) {
|
||||
ctx := context.TODO()
|
||||
opts := common.DefaultSearchOptions()
|
||||
opts.StartPage = 3
|
||||
opts.TotalPages = 2
|
||||
|
||||
block := blockForBenchmarks(b)
|
||||
pf, _, err := block.openForSearch(ctx, opts)
|
||||
require.NoError(b, err)
|
||||
|
||||
rgs := pf.RowGroups()
|
||||
rgs = rgs[3:5]
|
||||
|
||||
var instrPred *parquetquery.InstrumentedPredicate
|
||||
makeIterInternal := makeIterFunc(ctx, rgs, pf)
|
||||
makeIter := func(columnName string, predicate pq.Predicate, selectAs string) pq.Iterator {
|
||||
instrPred = &parquetquery.InstrumentedPredicate{
|
||||
Pred: predicate,
|
||||
}
|
||||
|
||||
return makeIterInternal(columnName, predicate, selectAs)
|
||||
}
|
||||
|
||||
b.ResetTimer()
|
||||
for i := 0; i < b.N; i++ {
|
||||
err := error(nil)
|
||||
|
||||
iter := makeIter(columnPathSpanAttrKey, parquetquery.NewSubstringPredicate("e"), "foo")
|
||||
|
||||
//parquetquery.NewUnionIterator(DefinitionLevelResourceSpansILSSpanAttrs, []parquetquery.Iterator{
|
||||
// makeIter(columnPathSpanHTTPStatusCode, parquetquery.NewIntEqualPredicate(500), "http_status"),
|
||||
// makeIter(columnPathSpanName, parquetquery.NewStringEqualPredicate([]byte("foo")), "name"),
|
||||
// makeIter(columnPathSpanStatusCode, parquetquery.NewIntEqualPredicate(2), "status"),
|
||||
// makeIter(columnPathSpanAttrDouble, parquetquery.NewFloatEqualPredicate(500), "double"),
|
||||
//makeIter(columnPathSpanAttrInt, parquetquery.NewIntEqualPredicate(500), "int"),
|
||||
//}, nil)
|
||||
require.NoError(b, err)
|
||||
// fmt.Println(iter.String())
|
||||
|
||||
count := 0
|
||||
for {
|
||||
res, err := iter.Next()
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
if res == nil {
|
||||
break
|
||||
}
|
||||
count++
|
||||
}
|
||||
iter.Close()
|
||||
if instrPred != nil {
|
||||
b.ReportMetric(float64(count), "count")
|
||||
b.ReportMetric(float64(instrPred.InspectedColumnChunks), "stats_cc")
|
||||
b.ReportMetric(float64(instrPred.KeptColumnChunks), "stats_cc_kept")
|
||||
b.ReportMetric(float64(instrPred.InspectedPages), "stats_ip")
|
||||
b.ReportMetric(float64(instrPred.KeptPages), "stats_ip_kept")
|
||||
b.ReportMetric(float64(instrPred.InspectedValues), "stats_v")
|
||||
b.ReportMetric(float64(instrPred.KeptValues), "stats_v_kept")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkBackendBlockQueryRange(b *testing.B) {
|
||||
testCases := []string{
|
||||
"{} | rate()",
|
||||
@ -1067,32 +1102,13 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
|
||||
"{status=error} | rate()",
|
||||
}
|
||||
|
||||
var (
|
||||
ctx = context.TODO()
|
||||
e = traceql.NewEngine()
|
||||
tenantID = "1"
|
||||
// blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
|
||||
// blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
|
||||
blockID = uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
|
||||
// path = "/Users/marty/src/tmp/"
|
||||
// path = "/Users/mapno/workspace/testblock"
|
||||
path = "/Users/suraj/wd/grafana/testblock"
|
||||
)
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path,
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
require.Equal(b, VersionString, meta.Version)
|
||||
|
||||
e := traceql.NewEngine()
|
||||
ctx := context.TODO()
|
||||
opts := common.DefaultSearchOptions()
|
||||
opts.TotalPages = 10
|
||||
block := newBackendBlock(meta, rr)
|
||||
_, _, err = block.openForSearch(ctx, opts)
|
||||
|
||||
block := blockForBenchmarks(b)
|
||||
_, _, err := block.openForSearch(ctx, opts)
|
||||
require.NoError(b, err)
|
||||
|
||||
f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
|
||||
@ -1103,10 +1119,10 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
|
||||
b.Run(tc, func(b *testing.B) {
|
||||
for _, minutes := range []int{5, 7} {
|
||||
b.Run(strconv.Itoa(minutes), func(b *testing.B) {
|
||||
st := meta.StartTime
|
||||
st := block.meta.StartTime
|
||||
end := st.Add(time.Duration(minutes) * time.Minute)
|
||||
|
||||
if end.After(meta.EndTime) {
|
||||
if end.After(block.meta.EndTime) {
|
||||
b.SkipNow()
|
||||
return
|
||||
}
|
||||
@ -1137,92 +1153,6 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
|
||||
}
|
||||
}
|
||||
|
||||
// TestBackendBlockQueryRange is the `TestOne` of metric queries.
|
||||
// It's skipped because it depends on a local block, like benchmarks
|
||||
//
|
||||
// You also need to manually print the iterator in `backendBlock.Fetch`,
|
||||
// because there is no access to the iterator in the test. Sad.
|
||||
func TestBackendBlockQueryRange(t *testing.T) {
|
||||
if os.Getenv("debug") != "1" {
|
||||
t.Skip()
|
||||
}
|
||||
|
||||
testCases := []string{
|
||||
"{} | rate()",
|
||||
"{} | rate() by (name)",
|
||||
"{} | rate() by (resource.service.name)",
|
||||
"{} | rate() by (span.http.url)", // High cardinality attribute
|
||||
"{resource.service.name=`tempo-ingester`} | rate()",
|
||||
"{status=unset} | rate()",
|
||||
}
|
||||
|
||||
const (
|
||||
tenantID = "1"
|
||||
queryHint = "with(exemplars=true)"
|
||||
)
|
||||
|
||||
var (
|
||||
ctx = context.TODO()
|
||||
e = traceql.NewEngine()
|
||||
opts = common.DefaultSearchOptions()
|
||||
blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
|
||||
path = path.Join("/Users/mapno/workspace/testblock")
|
||||
)
|
||||
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path,
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, VersionString, meta.Version)
|
||||
|
||||
block := newBackendBlock(meta, rr)
|
||||
opts.TotalPages = 10
|
||||
_, _, err = block.openForSearch(ctx, opts)
|
||||
require.NoError(t, err)
|
||||
|
||||
f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
|
||||
return block.Fetch(ctx, req, opts)
|
||||
})
|
||||
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc, func(t *testing.T) {
|
||||
st := meta.StartTime
|
||||
end := st.Add(time.Duration(5) * time.Minute)
|
||||
|
||||
if end.After(meta.EndTime) {
|
||||
t.SkipNow()
|
||||
return
|
||||
}
|
||||
|
||||
req := &tempopb.QueryRangeRequest{
|
||||
Query: fmt.Sprintf("%s %s", tc, queryHint),
|
||||
Step: uint64(time.Minute),
|
||||
Start: uint64(st.UnixNano()),
|
||||
End: uint64(end.UnixNano()),
|
||||
}
|
||||
|
||||
eval, err := e.CompileMetricsQueryRange(req, 1, 0, false)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, eval.Do(ctx, f, uint64(block.meta.StartTime.UnixNano()), uint64(block.meta.EndTime.UnixNano())))
|
||||
|
||||
ss := eval.Results()
|
||||
require.NotNil(t, ss)
|
||||
|
||||
for _, s := range ss {
|
||||
if s.Exemplars != nil && len(s.Exemplars) > 0 {
|
||||
fmt.Println("series", s.Labels)
|
||||
fmt.Println("Exemplars", s.Exemplars)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func ptr[T any](v T) *T {
|
||||
return &v
|
||||
}
|
||||
@ -2078,3 +2008,33 @@ func randomTree(N int) []traceql.Span {
|
||||
|
||||
return nodes
|
||||
}
|
||||
|
||||
func blockForBenchmarks(b *testing.B) *backendBlock {
|
||||
id, ok := os.LookupEnv("BENCH_BLOCKID")
|
||||
if !ok {
|
||||
b.Fatal("BENCH_BLOCKID is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_BLOCKID to the guid of the block to run benchmarks against. e.g. `export BENCH_BLOCKID=030c8c4f-9d47-4916-aadc-26b90b1d2bc4`")
|
||||
}
|
||||
|
||||
path, ok := os.LookupEnv("BENCH_PATH")
|
||||
if !ok {
|
||||
b.Fatal("BENCH_PATH is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_PATH to the root of the backend such that the block to benchmark is at <BENCH_PATH>/<BENCH_TENANTID>/<BENCH_BLOCKID>.")
|
||||
}
|
||||
|
||||
tenantID, ok := os.LookupEnv("BENCH_TENANTID")
|
||||
|
||||
if !ok {
|
||||
tenantID = "1"
|
||||
}
|
||||
|
||||
blockID := uuid.MustParse(id)
|
||||
r, _, _, err := local.New(&local.Config{
|
||||
Path: path,
|
||||
})
|
||||
require.NoError(b, err)
|
||||
|
||||
rr := backend.NewReader(r)
|
||||
meta, err := rr.BlockMeta(context.Background(), blockID, tenantID)
|
||||
require.NoError(b, err)
|
||||
|
||||
return newBackendBlock(meta, rr)
|
||||
}
|
||||
|
@ -256,7 +256,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
|
||||
|
||||
pf := file.parquetFile
|
||||
|
||||
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
|
||||
r := parquet.NewReader(pf)
|
||||
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
|
||||
}
|
||||
|
Reference in New Issue
Block a user