Do less Nexting (#4753)

* this is garbage

Signed-off-by: Joe Elliott <number101010@gmail.com>

* filtery stuff

Signed-off-by: Joe Elliott <number101010@gmail.com>

* fix

Signed-off-by: Joe Elliott <number101010@gmail.com>

* max def everywhere

Signed-off-by: Joe Elliott <number101010@gmail.com>

* clean up benches

Signed-off-by: Joe Elliott <number101010@gmail.com>

* clean up

Signed-off-by: Joe Elliott <number101010@gmail.com>

* remove vendor chagnes

Signed-off-by: Joe Elliott <number101010@gmail.com>

* changelog

Signed-off-by: Joe Elliott <number101010@gmail.com>

* add details about bench env vars

Signed-off-by: Joe Elliott <number101010@gmail.com>

---------

Signed-off-by: Joe Elliott <number101010@gmail.com>
This commit is contained in:
Joe Elliott
2025-02-28 08:08:18 -05:00
committed by GitHub
parent eb960ceb57
commit c1f6280dd1
31 changed files with 220 additions and 827 deletions

View File

@ -34,6 +34,7 @@ configurable via the throughput_bytes_slo field, and it will populate op="traces
* [ENHANCEMENT] Improve block-builder performance [#4596](https://github.com/grafana/tempo/pull/4596) (@mdisibio)
* [ENHANCEMENT] Improve block-builder performance by not using WAL stage [#4647](https://github.com/grafana/tempo/pull/4647) [#4671](https://github.com/grafana/tempo/pull/4671) (@mdisibio)
* [ENHANCEMENT] Export new `tempo_ingest_group_partition_lag` metric from block-builders and metrics-generators [#4571](https://github.com/grafana/tempo/pull/4571) (@mdisibio)
* [ENHANCEMENT] Overall iterator performance improvement by using max definition level to ignore parts of the RowNumber while nexting. [#4753](https://github.com/grafana/tempo/pull/4753) (@joe-elliott)
* [ENHANCEMENT] Use distroless base container images for improved security [#4556](https://github.com/grafana/tempo/pull/4556) (@carles-grafana)
* [ENHANCEMENT] rythm: add block builder to resources dashboard[#4556](https://github.com/grafana/tempo/pull/4669) (@javiermolinar)
* [ENHANCEMENT] update dskit to latest version[#4681](https://github.com/grafana/tempo/pull/4681) (@javiermolinar)

View File

@ -240,10 +240,10 @@ type attribute struct {
}
func aggregateAttributes(pf *parquet.File, keyPath string, valuePaths []string) (genericAttrSummary, error) {
keyIdx, _ := pq.GetColumnIndexByPath(pf, keyPath)
keyIdx, _, _ := pq.GetColumnIndexByPath(pf, keyPath)
valueIdxs := make([]int, 0, len(valuePaths))
for _, v := range valuePaths {
idx, _ := pq.GetColumnIndexByPath(pf, v)
idx, _, _ := pq.GetColumnIndexByPath(pf, v)
valueIdxs = append(valueIdxs, idx)
}
@ -311,7 +311,7 @@ func aggregateDedicatedColumns(pf *parquet.File, scope backend.DedicatedColumnSc
}
func aggregateColumn(pf *parquet.File, colName string) (uint64, error) {
idx, _ := pq.GetColumnIndexByPath(pf, colName)
idx, _, _ := pq.GetColumnIndexByPath(pf, colName)
calc, err := inspect.NewRowStatCalculator(pf, inspect.RowStatOptions{
Columns: []int{idx},
})

View File

@ -38,7 +38,7 @@ func (cmd *listColumnCmd) Run(ctx *globalOptions) error {
return err
}
colIndex, _ := pq.GetColumnIndexByPath(pf, cmd.Column)
colIndex, _, _ := pq.GetColumnIndexByPath(pf, cmd.Column)
for i, rg := range pf.RowGroups() {

View File

@ -126,7 +126,7 @@ func getAllTraceIDs(t *testing.T, dir string, tenant string) []string {
err := r.Close()
require.NoError(t, err)
}()
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
require.GreaterOrEqual(t, traceIDIndex, 0)
defer func() {
err := r.Close()

View File

@ -128,429 +128,7 @@ func (t *RowNumber) Valid() bool {
// null | 1 | 1 | { 0, 1, -1, -1 }
// gb | 1 | 3 | { 0, 2, 0, 0 }
// null | 0 | 1 | { 1, 0, -1, -1 }
func (t *RowNumber) Next(repetitionLevel, definitionLevel int) {
t[repetitionLevel]++
// the following is nextSlow() unrolled
switch repetitionLevel {
case 0:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[1] = 0
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[1] = 0
t[2] = 0
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[1] = 0
t[2] = 0
t[3] = 0
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[1] = 0
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[1] = 0
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = -1
t[7] = -1
case 6:
t[1] = 0
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = -1
case 7:
t[1] = 0
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 1:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[2] = 0
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[2] = 0
t[3] = 0
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = -1
t[7] = -1
case 6:
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = -1
case 7:
t[2] = 0
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 2:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[3] = 0
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[3] = 0
t[4] = 0
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = -1
t[7] = -1
case 6:
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = -1
case 7:
t[3] = 0
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 3:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[4] = 0
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[4] = 0
t[5] = 0
t[6] = -1
t[7] = -1
case 6:
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = -1
case 7:
t[4] = 0
t[5] = 0
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 4:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[5] = 0
t[6] = -1
t[7] = -1
case 6:
t[5] = 0
t[6] = 0
t[7] = -1
case 7:
t[5] = 0
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 5:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[6] = -1
t[7] = -1
case 6:
t[6] = 0
t[7] = -1
case 7:
t[6] = 0
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 6:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[6] = -1
t[7] = -1
case 6:
t[7] = -1
case 7:
t[7] = 0
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
case 7:
switch definitionLevel {
case 0:
t[1] = -1
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 1:
t[2] = -1
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 2:
t[3] = -1
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 3:
t[4] = -1
t[5] = -1
t[6] = -1
t[7] = -1
case 4:
t[5] = -1
t[6] = -1
t[7] = -1
case 5:
t[6] = -1
t[7] = -1
case 6:
t[7] = -1
case 7:
default:
panicWhenInvalidDefinitionLevel(definitionLevel)
}
}
}
// nextSlow is the original implementation of next. it is kept to test against
// the unrolled version above
func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
func (t *RowNumber) Next(repetitionLevel, definitionLevel, maxDefinitionLevel int) {
t[repetitionLevel]++
// New children up through the definition level
@ -559,7 +137,7 @@ func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
}
// // Children past the definition level are undefined
for i := definitionLevel + 1; i < len(t); i++ {
for i := definitionLevel + 1; i < len(t) && i <= maxDefinitionLevel; i++ {
t[i] = -1
}
}
@ -803,13 +381,15 @@ type SyncIterator struct {
currPageN int
at IteratorResult // Current value pointed at by iterator. Returned by call Next and SeekTo, valid until next call.
maxDefinitionLevel int
intern bool
interner *intern.Interner
}
var _ Iterator = (*SyncIterator)(nil)
func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, opts ...SyncIteratorOpt) *SyncIterator {
func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int, opts ...SyncIteratorOpt) *SyncIterator {
// Assign row group bounds.
// Lower bound is inclusive
// Upper bound is exclusive, points at the first row of the next group
@ -841,16 +421,17 @@ func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnN
// Create the iterator
i := &SyncIterator{
span: span,
column: column,
columnName: columnName,
rgs: rgs,
readSize: readSize,
rgsMin: rgsMin,
rgsMax: rgsMax,
filter: filter,
curr: EmptyRowNumber(),
at: at,
span: span,
column: column,
columnName: columnName,
rgs: rgs,
readSize: readSize,
rgsMin: rgsMin,
rgsMax: rgsMax,
filter: filter,
curr: EmptyRowNumber(),
at: at,
maxDefinitionLevel: maxDefinitionLevel,
}
// Apply options
@ -1160,7 +741,7 @@ func (c *SyncIterator) next() (RowNumber, *pq.Value, error) {
// Inspect all values to track the current row number,
// even if the value is filtered out next.
c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel())
c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)
c.currBufN++
c.currPageN++
@ -1260,11 +841,12 @@ func (c *SyncIterator) Close() {
// the optional predicate to each chunk, page, and value. Results are read by calling
// Next() until it returns nil.
type ColumnIterator struct {
rgs []pq.RowGroup
col int
colName string
filter *InstrumentedPredicate
selectAs string
rgs []pq.RowGroup
col int
colName string
filter *InstrumentedPredicate
selectAs string
maxDefinitionLevel int
// Row number to seek to, protected by mutex.
// Less allocs than storing in atomic.Value
@ -1288,16 +870,17 @@ type columnIteratorBuffer struct {
values []pq.Value
}
func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string) *ColumnIterator {
func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int) *ColumnIterator {
c := &ColumnIterator{
rgs: rgs,
col: column,
colName: columnName,
filter: &InstrumentedPredicate{pred: filter},
selectAs: selectAs,
quit: make(chan struct{}),
ch: make(chan *columnIteratorBuffer, 1),
currN: -1,
rgs: rgs,
col: column,
colName: columnName,
filter: &InstrumentedPredicate{Pred: filter},
selectAs: selectAs,
quit: make(chan struct{}),
ch: make(chan *columnIteratorBuffer, 1),
currN: -1,
maxDefinitionLevel: maxDefinitionLevel,
}
c.iter = func() { c.iterate(ctx, readSize) }
@ -1417,7 +1000,7 @@ func (c *ColumnIterator) iterate(ctx context.Context, readSize int) {
// We have to do this for all values (even if the
// value is excluded by the predicate)
rn.Next(v.RepetitionLevel(), v.DefinitionLevel())
rn.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)
if c.filter != nil {
if !c.filter.KeepValue(v) {
@ -2015,7 +1598,6 @@ func (u *UnionIterator) Next() (*IteratorResult, error) {
if err != nil {
return nil, fmt.Errorf("union iterator peek failed: %w", err)
}
// If this iterator is exhausted go to the next one
if rn == nil {
continue

View File

@ -3,7 +3,6 @@ package parquetquery
import (
"context"
"math"
"math/rand"
"os"
"strconv"
"testing"
@ -19,30 +18,13 @@ var iterTestCases = []struct {
makeIter makeTestIterFn
}{
{"async", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
}},
{"sync", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
}},
}
// TestNext compares the unrolled Next() with the original nextSlow() to
// prevent drift
func TestNext(t *testing.T) {
rn1 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
rn2 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
for i := 0; i < 1000; i++ {
r := rand.Intn(MaxDefinitionLevel + 1)
d := rand.Intn(MaxDefinitionLevel + 1)
rn1.Next(r, d)
rn2.nextSlow(r, d)
require.Equal(t, rn1, rn2)
}
}
// TestTruncate compares the unrolled TruncateRowNumber() with the original truncateRowNumberSlow() to
// prevent drift
func TestTruncateRowNumber(t *testing.T) {
@ -73,44 +55,26 @@ func TestInvalidDefinitionLevelTruncate(t *testing.T) {
})
}
func TestInvalidDefinitionLevelNext(t *testing.T) {
t.Run("Next -1", func(t *testing.T) {
assertPanic(t, func() {
rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
r := 0
d := -1
rn.Next(r, d)
})
})
t.Run("Next Max+1", func(t *testing.T) {
assertPanic(t, func() {
rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
r := 0
d := MaxDefinitionLevel + 1
rn.Next(r, d)
})
})
}
func TestRowNumber(t *testing.T) {
func TestRowNumberNext(t *testing.T) {
tr := EmptyRowNumber()
require.Equal(t, RowNumber{-1, -1, -1, -1, -1, -1, -1, -1}, tr)
steps := []struct {
repetitionLevel int
definitionLevel int
expected RowNumber
repetitionLevel int
definitionLevel int
maxDefinitionLevel int
expected RowNumber
}{
// Name.Language.Country examples from the Dremel whitepaper
{0, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
{2, 2, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
{1, 1, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
{1, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
{0, 1, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
{0, 3, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
{2, 2, 3, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
{1, 1, 3, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
{1, 3, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
{0, 1, 3, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
}
for _, step := range steps {
tr.Next(step.repetitionLevel, step.definitionLevel)
tr.Next(step.repetitionLevel, step.definitionLevel, step.maxDefinitionLevel)
require.Equal(t, step.expected, tr)
}
}
@ -158,7 +122,7 @@ func testColumnIterator(t *testing.T, makeIter makeTestIterFn) {
count := 100_000
pf := createTestFile(t, count)
idx, _ := GetColumnIndexByPath(pf, "A")
idx, _, _ := GetColumnIndexByPath(pf, "A")
iter := makeIter(pf, idx, nil, "A")
defer iter.Close()
@ -187,7 +151,7 @@ func testColumnIteratorSeek(t *testing.T, makeIter makeTestIterFn) {
count := 10_000
pf := createTestFile(t, count)
idx, _ := GetColumnIndexByPath(pf, "A")
idx, _, _ := GetColumnIndexByPath(pf, "A")
iter := makeIter(pf, idx, nil, "A")
defer iter.Close()
@ -224,7 +188,7 @@ func testColumnIteratorPredicate(t *testing.T, makeIter makeTestIterFn) {
pred := NewIntBetweenPredicate(7001, 7003)
idx, _ := GetColumnIndexByPath(pf, "A")
idx, _, _ := GetColumnIndexByPath(pf, "A")
iter := makeIter(pf, idx, pred, "A")
defer iter.Close()
@ -253,7 +217,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
}
pf := createFileWith(t, rows)
idx, _ := GetColumnIndexByPath(pf, "A")
idx, _, _ := GetColumnIndexByPath(pf, "A")
readSize := 1000
readIter := func(iter Iterator) (int, error) {
@ -275,7 +239,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
// Cancel before iterating
ctx, cancel := context.WithCancel(context.TODO())
cancel()
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
count, err := readIter(iter)
require.ErrorContains(t, err, "context canceled")
require.Equal(t, 0, count)
@ -283,7 +247,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
t.Run("cancelledPartial", func(t *testing.T) {
ctx, cancel := context.WithCancel(context.TODO())
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
// Read some results
_, err := iter.Next()
@ -299,7 +263,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
t.Run("closedEarly", func(t *testing.T) {
// Close before iterating
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
iter.Close()
count, err := readIter(iter)
require.NoError(t, err)
@ -307,7 +271,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
})
t.Run("closedPartial", func(t *testing.T) {
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
// Read some results
_, err := iter.Next()
@ -335,7 +299,7 @@ func benchmarkColumnIterator(b *testing.B, makeIter makeTestIterFn) {
count := 100_000
pf := createTestFile(b, count)
idx, _ := GetColumnIndexByPath(pf, "A")
idx, _, _ := GetColumnIndexByPath(pf, "A")
b.ResetTimer()

View File

@ -237,9 +237,9 @@ func testPredicate(t *testing.T, tc predicateTestCase) {
r, err := parquet.OpenFile(file, int64(buf.Len()))
require.NoError(t, err)
p := InstrumentedPredicate{pred: tc.predicate}
p := InstrumentedPredicate{Pred: tc.predicate}
i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "")
i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "", MaxDefinitionLevel)
for {
res, err := i.Next()
require.NoError(t, err)

View File

@ -372,7 +372,7 @@ func (p *OrPredicate) KeepValue(v pq.Value) bool {
}
type InstrumentedPredicate struct {
pred Predicate // Optional, if missing then just keeps metrics with no filtering
Pred Predicate // Optional, if missing then just keeps metrics with no filtering
InspectedColumnChunks int64
InspectedPages int64
InspectedValues int64
@ -384,16 +384,16 @@ type InstrumentedPredicate struct {
var _ Predicate = (*InstrumentedPredicate)(nil)
func (p *InstrumentedPredicate) String() string {
if p.pred == nil {
if p.Pred == nil {
return fmt.Sprintf("InstrumentedPredicate{%d, nil}", p.InspectedValues)
}
return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.pred)
return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.Pred)
}
func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
p.InspectedColumnChunks++
if p.pred == nil || p.pred.KeepColumnChunk(c) {
if p.Pred == nil || p.Pred.KeepColumnChunk(c) {
p.KeptColumnChunks++
return true
}
@ -404,7 +404,7 @@ func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
p.InspectedPages++
if p.pred == nil || p.pred.KeepPage(page) {
if p.Pred == nil || p.Pred.KeepPage(page) {
p.KeptPages++
return true
}
@ -415,7 +415,7 @@ func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool {
p.InspectedValues++
if p.pred == nil || p.pred.KeepValue(v) {
if p.Pred == nil || p.Pred.KeepValue(v) {
p.KeptValues++
return true
}

View File

@ -6,23 +6,23 @@ import (
pq "github.com/parquet-go/parquet-go"
)
func GetColumnIndexByPath(pf *pq.File, s string) (index, depth int) {
func GetColumnIndexByPath(pf *pq.File, s string) (index, depth, maxDef int) {
colSelector := strings.Split(s, ".")
n := pf.Root()
for len(colSelector) > 0 {
n = n.Column(colSelector[0])
if n == nil {
return -1, -1
return -1, -1, -1
}
colSelector = colSelector[1:]
depth++
}
return n.Index(), depth
return n.Index(), depth, n.MaxDefinitionLevel()
}
func HasColumn(pf *pq.File, s string) bool {
index, _ := GetColumnIndexByPath(pf, s)
index, _, _ := GetColumnIndexByPath(pf, s)
return index >= 0
}

View File

@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
// traceID column index
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
if colIndex == -1 {
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
}
@ -227,7 +227,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
}
// Now iterate the matching row group
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
defer iter.Close()
res, err := iter.Next()

View File

@ -34,7 +34,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
return nil, err
}
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
if traceIDIndex < 0 {
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
}

View File

@ -356,14 +356,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
index, _ := pq.GetColumnIndexByPath(pf, name)
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
if index == -1 {
// TODO - don't panic, error instead
panic("column not found in parquet file:" + name)
}
if async {
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
}
var opts []pq.SyncIteratorOpt
@ -371,7 +371,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
opts = append(opts, pq.SyncIteratorOptIntern())
}
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
}
}

View File

@ -69,14 +69,14 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
specialAttrIdxs := map[int]string{}
// standard resource attributes
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
if resourceKeyIdx == -1 {
return fmt.Errorf("resource attributes col not found (%d)", resourceKeyIdx)
}
// special resource attributes
for lbl, col := range specialMappings {
idx, _ := pq.GetColumnIndexByPath(pf, col)
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
if idx == -1 {
continue
}

View File

@ -244,7 +244,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
pf := file.parquetFile
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
r := parquet.NewReader(pf)
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
}

View File

@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
// atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
hasValues := func(path string, pf *parquet.File) bool {
idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
md := pf.Metadata()
for _, rg := range md.RowGroups {
col := rg.Columns[idx]

View File

@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
// traceID column index
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
if colIndex == -1 {
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
}
@ -230,7 +230,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
}
// Now iterate the matching row group
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
defer iter.Close()
res, err := iter.Next()

View File

@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
return nil, err
}
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
if traceIDIndex < 0 {
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
}

View File

@ -354,14 +354,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
index, _ := pq.GetColumnIndexByPath(pf, name)
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
if index == -1 {
// TODO - don't panic, error instead
panic("column not found in parquet file:" + name)
}
if async {
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
}
var opts []pq.SyncIteratorOpt
@ -369,7 +369,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
opts = append(opts, pq.SyncIteratorOptIntern())
}
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
}
}

View File

@ -70,11 +70,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
specialAttrIdxs := map[int]string{}
// standard attributes
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
// special attributes
for lbl, col := range specialMappings {
idx, _ := pq.GetColumnIndexByPath(pf, col)
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
if idx == -1 {
continue
}
@ -84,7 +84,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
// dedicated attributes
columnMapping.forEach(func(lbl string, c dedicatedColumn) {
idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
if idx == -1 {
return
}

View File

@ -249,7 +249,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
pf := file.parquetFile
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
r := parquet.NewReader(pf)
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
}

View File

@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
// atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
hasValues := func(path string, pf *parquet.File) bool {
idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
md := pf.Metadata()
for _, rg := range md.RowGroups {
col := rg.Columns[idx]

View File

@ -3,17 +3,13 @@ package vparquet4
import (
"context"
"fmt"
"path"
"sort"
"testing"
"github.com/google/uuid"
"github.com/grafana/tempo/pkg/collector"
"github.com/grafana/tempo/pkg/tempopb"
"github.com/grafana/tempo/pkg/traceql"
"github.com/grafana/tempo/pkg/util/test"
"github.com/grafana/tempo/tempodb/backend"
"github.com/grafana/tempo/tempodb/backend/local"
"github.com/grafana/tempo/tempodb/encoding/common"
"github.com/stretchr/testify/require"
)
@ -701,21 +697,7 @@ func BenchmarkFetchTagValues(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
r, _, _, err := local.New(&local.Config{
// Path: path.Join("/Users/marty/src/tmp/"),
Path: path.Join("/Users/joe/testblock"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
block := newBackendBlock(meta, rr)
block := blockForBenchmarks(b)
opts := common.DefaultSearchOptions()
for _, tc := range testCases {
@ -782,21 +764,7 @@ func BenchmarkFetchTags(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
r, _, _, err := local.New(&local.Config{
// Path: path.Join("/Users/marty/src/tmp/"),
Path: path.Join("/Users/joe/testblock"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
block := newBackendBlock(meta, rr)
block := blockForBenchmarks(b)
opts := common.DefaultSearchOptions()
for _, tc := range testCases {

View File

@ -146,7 +146,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt
func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.Trace, error) {
// traceID column index
colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
if colIndex == -1 {
return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
}
@ -237,7 +237,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
}
// Now iterate the matching row group
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
defer iter.Close()
res, err := iter.Next()

View File

@ -144,52 +144,10 @@ func TestBackendBlockFindTraceByID_TestData(t *testing.T) {
}
}
/*func genIndex(t require.TestingT, block *backendBlock) *index {
pf, _, err := block.openForSearch(context.TODO(), common.DefaultSearchOptions())
require.NoError(t, err)
i := &index{}
for j := range pf.RowGroups() {
iter := parquetquery.NewSyncIterator(context.TODO(), pf.RowGroups()[j:j+1], 0, "", 1000, nil, "TraceID")
defer iter.Close()
for {
v, err := iter.Next()
require.NoError(t, err)
if v == nil {
break
}
i.Add(v.Entries[0].Value.ByteArray())
}
i.Flush()
}
return i
}*/
func BenchmarkFindTraceByID(b *testing.B) {
var (
ctx = context.TODO()
tenantID = "1"
blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
path = "/Users/marty/src/tmp/"
)
r, _, _, err := local.New(&local.Config{
Path: path,
})
require.NoError(b, err)
rr := backend.NewReader(r)
// ww := backend.NewWriter(w)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
ctx := context.TODO()
traceID := []byte{}
block := newBackendBlock(meta, rr)
block := blockForBenchmarks(b)
// index := genIndex(b, block)
// writeBlockMeta(ctx, ww, meta, &common.ShardedBloomFilter{}, index)

View File

@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
return nil, err
}
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
if traceIDIndex < 0 {
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
}

View File

@ -357,14 +357,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue
return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
index, _ := pq.GetColumnIndexByPath(pf, name)
index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
if index == -1 {
// TODO - don't panic, error instead
panic("column not found in parquet file:" + name)
}
if async {
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
}
var opts []pq.SyncIteratorOpt
@ -372,7 +372,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
opts = append(opts, pq.SyncIteratorOptIntern())
}
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
}
}

View File

@ -71,11 +71,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
specialAttrIdxs := map[int]string{}
// standard attributes
resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
// special attributes
for lbl, col := range specialMappings {
idx, _ := pq.GetColumnIndexByPath(pf, col)
idx, _, _ := pq.GetColumnIndexByPath(pf, col)
if idx == -1 {
continue
}
@ -85,7 +85,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
// dedicated attributes
columnMapping.forEach(func(lbl string, c dedicatedColumn) {
idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
if idx == -1 {
return
}

View File

@ -2,14 +2,10 @@ package vparquet4
import (
"context"
"path"
"testing"
"github.com/google/uuid"
"github.com/grafana/tempo/pkg/collector"
"github.com/grafana/tempo/pkg/traceql"
"github.com/grafana/tempo/tempodb/backend"
"github.com/grafana/tempo/tempodb/backend/local"
"github.com/grafana/tempo/tempodb/encoding/common"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@ -189,19 +185,8 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) {
func BenchmarkBackendBlockSearchTags(b *testing.B) {
ctx := context.TODO()
tenantID := "1"
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
block := blockForBenchmarks(b)
r, _, _, err := local.New(&local.Config{
Path: path.Join("/Users/marty/src/tmp/"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
block := newBackendBlock(meta, rr)
opts := common.DefaultSearchOptions()
d := collector.NewDistinctString(1_000_000, 0, 0)
mc := collector.NewMetricsCollector()
@ -221,19 +206,7 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
r, _, _, err := local.New(&local.Config{
Path: path.Join("/Users/marty/src/tmp/"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
block := newBackendBlock(meta, rr)
block := blockForBenchmarks(b)
opts := common.DefaultSearchOptions()
for _, tc := range testCases {

View File

@ -3,7 +3,6 @@ package vparquet4
import (
"context"
"math/rand"
"path"
"testing"
"time"
@ -412,19 +411,7 @@ func BenchmarkBackendBlockSearchTraces(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
r, _, _, err := local.New(&local.Config{
Path: path.Join("/Users/marty/src/tmp/"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
block := newBackendBlock(meta, rr)
block := blockForBenchmarks(b)
opts := common.DefaultSearchOptions()
opts.StartPage = 10

View File

@ -6,7 +6,6 @@ import (
"fmt"
"math/rand"
"os"
"path"
"sort"
"strconv"
"strings"
@ -17,6 +16,7 @@ import (
"github.com/stretchr/testify/require"
"github.com/grafana/tempo/pkg/parquetquery"
pq "github.com/grafana/tempo/pkg/parquetquery"
"github.com/grafana/tempo/pkg/tempopb"
v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
"github.com/grafana/tempo/pkg/traceql"
@ -957,28 +957,13 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID := uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
blockID := uuid.MustParse("030c8c4f-9d47-4916-aadc-26b90b1d2bc4")
r, _, _, err := local.New(&local.Config{
// Path: path.Join("/Users/marty/src/tmp"),
// Path: path.Join("/Users/mapno/workspace/testblock"),
Path: path.Join("/Users/joe/testblock"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
opts := common.DefaultSearchOptions()
opts.StartPage = 3
opts.TotalPages = 2
block := newBackendBlock(meta, rr)
_, _, err = block.openForSearch(ctx, opts)
block := blockForBenchmarks(b)
_, _, err := block.openForSearch(ctx, opts)
require.NoError(b, err)
for _, tc := range testCases {
@ -1016,27 +1001,12 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
}
ctx := context.TODO()
tenantID := "1"
// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
blockID := uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
r, _, _, err := local.New(&local.Config{
// Path: path.Join("/Users/marty/src/tmp/"),
Path: path.Join("/Users/suraj/wd/grafana/testblock"),
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
require.Equal(b, VersionString, meta.Version)
opts := common.DefaultSearchOptions()
opts.StartPage = 10
opts.TotalPages = 10
block := newBackendBlock(meta, rr)
_, _, err = block.openForSearch(ctx, opts)
block := blockForBenchmarks(b)
_, _, err := block.openForSearch(ctx, opts)
require.NoError(b, err)
for _, tc := range testCases {
@ -1057,6 +1027,71 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
}
}
// BenchmarkIterators is a convenient method to run benchmarks on various iterator constructions directly when working on optimizations.
// Replace the iterator at the beginning of the benchmark loop with any combination desired.
func BenchmarkIterators(b *testing.B) {
ctx := context.TODO()
opts := common.DefaultSearchOptions()
opts.StartPage = 3
opts.TotalPages = 2
block := blockForBenchmarks(b)
pf, _, err := block.openForSearch(ctx, opts)
require.NoError(b, err)
rgs := pf.RowGroups()
rgs = rgs[3:5]
var instrPred *parquetquery.InstrumentedPredicate
makeIterInternal := makeIterFunc(ctx, rgs, pf)
makeIter := func(columnName string, predicate pq.Predicate, selectAs string) pq.Iterator {
instrPred = &parquetquery.InstrumentedPredicate{
Pred: predicate,
}
return makeIterInternal(columnName, predicate, selectAs)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
err := error(nil)
iter := makeIter(columnPathSpanAttrKey, parquetquery.NewSubstringPredicate("e"), "foo")
//parquetquery.NewUnionIterator(DefinitionLevelResourceSpansILSSpanAttrs, []parquetquery.Iterator{
// makeIter(columnPathSpanHTTPStatusCode, parquetquery.NewIntEqualPredicate(500), "http_status"),
// makeIter(columnPathSpanName, parquetquery.NewStringEqualPredicate([]byte("foo")), "name"),
// makeIter(columnPathSpanStatusCode, parquetquery.NewIntEqualPredicate(2), "status"),
// makeIter(columnPathSpanAttrDouble, parquetquery.NewFloatEqualPredicate(500), "double"),
//makeIter(columnPathSpanAttrInt, parquetquery.NewIntEqualPredicate(500), "int"),
//}, nil)
require.NoError(b, err)
// fmt.Println(iter.String())
count := 0
for {
res, err := iter.Next()
if err != nil {
panic(err)
}
if res == nil {
break
}
count++
}
iter.Close()
if instrPred != nil {
b.ReportMetric(float64(count), "count")
b.ReportMetric(float64(instrPred.InspectedColumnChunks), "stats_cc")
b.ReportMetric(float64(instrPred.KeptColumnChunks), "stats_cc_kept")
b.ReportMetric(float64(instrPred.InspectedPages), "stats_ip")
b.ReportMetric(float64(instrPred.KeptPages), "stats_ip_kept")
b.ReportMetric(float64(instrPred.InspectedValues), "stats_v")
b.ReportMetric(float64(instrPred.KeptValues), "stats_v_kept")
}
}
}
func BenchmarkBackendBlockQueryRange(b *testing.B) {
testCases := []string{
"{} | rate()",
@ -1067,32 +1102,13 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
"{status=error} | rate()",
}
var (
ctx = context.TODO()
e = traceql.NewEngine()
tenantID = "1"
// blockID = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
// blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
blockID = uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
// path = "/Users/marty/src/tmp/"
// path = "/Users/mapno/workspace/testblock"
path = "/Users/suraj/wd/grafana/testblock"
)
r, _, _, err := local.New(&local.Config{
Path: path,
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(b, err)
require.Equal(b, VersionString, meta.Version)
e := traceql.NewEngine()
ctx := context.TODO()
opts := common.DefaultSearchOptions()
opts.TotalPages = 10
block := newBackendBlock(meta, rr)
_, _, err = block.openForSearch(ctx, opts)
block := blockForBenchmarks(b)
_, _, err := block.openForSearch(ctx, opts)
require.NoError(b, err)
f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
@ -1103,10 +1119,10 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
b.Run(tc, func(b *testing.B) {
for _, minutes := range []int{5, 7} {
b.Run(strconv.Itoa(minutes), func(b *testing.B) {
st := meta.StartTime
st := block.meta.StartTime
end := st.Add(time.Duration(minutes) * time.Minute)
if end.After(meta.EndTime) {
if end.After(block.meta.EndTime) {
b.SkipNow()
return
}
@ -1137,92 +1153,6 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
}
}
// TestBackendBlockQueryRange is the `TestOne` of metric queries.
// It's skipped because it depends on a local block, like benchmarks
//
// You also need to manually print the iterator in `backendBlock.Fetch`,
// because there is no access to the iterator in the test. Sad.
func TestBackendBlockQueryRange(t *testing.T) {
if os.Getenv("debug") != "1" {
t.Skip()
}
testCases := []string{
"{} | rate()",
"{} | rate() by (name)",
"{} | rate() by (resource.service.name)",
"{} | rate() by (span.http.url)", // High cardinality attribute
"{resource.service.name=`tempo-ingester`} | rate()",
"{status=unset} | rate()",
}
const (
tenantID = "1"
queryHint = "with(exemplars=true)"
)
var (
ctx = context.TODO()
e = traceql.NewEngine()
opts = common.DefaultSearchOptions()
blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
path = path.Join("/Users/mapno/workspace/testblock")
)
r, _, _, err := local.New(&local.Config{
Path: path,
})
require.NoError(t, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(ctx, blockID, tenantID)
require.NoError(t, err)
require.Equal(t, VersionString, meta.Version)
block := newBackendBlock(meta, rr)
opts.TotalPages = 10
_, _, err = block.openForSearch(ctx, opts)
require.NoError(t, err)
f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
return block.Fetch(ctx, req, opts)
})
for _, tc := range testCases {
t.Run(tc, func(t *testing.T) {
st := meta.StartTime
end := st.Add(time.Duration(5) * time.Minute)
if end.After(meta.EndTime) {
t.SkipNow()
return
}
req := &tempopb.QueryRangeRequest{
Query: fmt.Sprintf("%s %s", tc, queryHint),
Step: uint64(time.Minute),
Start: uint64(st.UnixNano()),
End: uint64(end.UnixNano()),
}
eval, err := e.CompileMetricsQueryRange(req, 1, 0, false)
require.NoError(t, err)
require.NoError(t, eval.Do(ctx, f, uint64(block.meta.StartTime.UnixNano()), uint64(block.meta.EndTime.UnixNano())))
ss := eval.Results()
require.NotNil(t, ss)
for _, s := range ss {
if s.Exemplars != nil && len(s.Exemplars) > 0 {
fmt.Println("series", s.Labels)
fmt.Println("Exemplars", s.Exemplars)
}
}
})
}
}
func ptr[T any](v T) *T {
return &v
}
@ -2078,3 +2008,33 @@ func randomTree(N int) []traceql.Span {
return nodes
}
func blockForBenchmarks(b *testing.B) *backendBlock {
id, ok := os.LookupEnv("BENCH_BLOCKID")
if !ok {
b.Fatal("BENCH_BLOCKID is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_BLOCKID to the guid of the block to run benchmarks against. e.g. `export BENCH_BLOCKID=030c8c4f-9d47-4916-aadc-26b90b1d2bc4`")
}
path, ok := os.LookupEnv("BENCH_PATH")
if !ok {
b.Fatal("BENCH_PATH is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_PATH to the root of the backend such that the block to benchmark is at <BENCH_PATH>/<BENCH_TENANTID>/<BENCH_BLOCKID>.")
}
tenantID, ok := os.LookupEnv("BENCH_TENANTID")
if !ok {
tenantID = "1"
}
blockID := uuid.MustParse(id)
r, _, _, err := local.New(&local.Config{
Path: path,
})
require.NoError(b, err)
rr := backend.NewReader(r)
meta, err := rr.BlockMeta(context.Background(), blockID, tenantID)
require.NoError(b, err)
return newBackendBlock(meta, rr)
}

View File

@ -256,7 +256,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {
pf := file.parquetFile
idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
r := parquet.NewReader(pf)
return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
}