Do less Nexting (#4753)

* this is garbage Signed-off-by: Joe Elliott <number101010@gmail.com> * filtery stuff Signed-off-by: Joe Elliott <number101010@gmail.com> * fix Signed-off-by: Joe Elliott <number101010@gmail.com> * max def everywhere Signed-off-by: Joe Elliott <number101010@gmail.com> * clean up benches Signed-off-by: Joe Elliott <number101010@gmail.com> * clean up Signed-off-by: Joe Elliott <number101010@gmail.com> * remove vendor chagnes Signed-off-by: Joe Elliott <number101010@gmail.com> * changelog Signed-off-by: Joe Elliott <number101010@gmail.com> * add details about bench env vars Signed-off-by: Joe Elliott <number101010@gmail.com> --------- Signed-off-by: Joe Elliott <number101010@gmail.com>
2025-03-14 03:06:42 +00:00 · 2025-02-28 08:08:18 -05:00
parent eb960ceb57
commit c1f6280dd1
31 changed files with 220 additions and 827 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -34,6 +34,7 @@ configurable via the throughput_bytes_slo field, and it will populate op="traces
 * [ENHANCEMENT] Improve block-builder performance [#4596](https://github.com/grafana/tempo/pull/4596) (@mdisibio)
 * [ENHANCEMENT] Improve block-builder performance by not using WAL stage [#4647](https://github.com/grafana/tempo/pull/4647) [#4671](https://github.com/grafana/tempo/pull/4671) (@mdisibio)
 * [ENHANCEMENT] Export new `tempo_ingest_group_partition_lag` metric from block-builders and metrics-generators [#4571](https://github.com/grafana/tempo/pull/4571) (@mdisibio)
+* [ENHANCEMENT] Overall iterator performance improvement by using max definition level to ignore parts of the RowNumber while nexting. [#4753](https://github.com/grafana/tempo/pull/4753) (@joe-elliott)
 * [ENHANCEMENT] Use distroless base container images for improved security [#4556](https://github.com/grafana/tempo/pull/4556) (@carles-grafana)
 * [ENHANCEMENT] rythm: add block builder to resources dashboard[#4556](https://github.com/grafana/tempo/pull/4669) (@javiermolinar)
 * [ENHANCEMENT] update dskit to latest version[#4681](https://github.com/grafana/tempo/pull/4681) (@javiermolinar)
--- a/cmd/tempo-cli/cmd-analyse-block.go
+++ b/cmd/tempo-cli/cmd-analyse-block.go
@ -240,10 +240,10 @@ type attribute struct {
 }

 func aggregateAttributes(pf *parquet.File, keyPath string, valuePaths []string) (genericAttrSummary, error) {
-	keyIdx, _ := pq.GetColumnIndexByPath(pf, keyPath)
+	keyIdx, _, _ := pq.GetColumnIndexByPath(pf, keyPath)
 	valueIdxs := make([]int, 0, len(valuePaths))
 	for _, v := range valuePaths {
-		idx, _ := pq.GetColumnIndexByPath(pf, v)
+		idx, _, _ := pq.GetColumnIndexByPath(pf, v)
 		valueIdxs = append(valueIdxs, idx)
 	}

@ -311,7 +311,7 @@ func aggregateDedicatedColumns(pf *parquet.File, scope backend.DedicatedColumnSc
 }

 func aggregateColumn(pf *parquet.File, colName string) (uint64, error) {
-	idx, _ := pq.GetColumnIndexByPath(pf, colName)
+	idx, _, _ := pq.GetColumnIndexByPath(pf, colName)
 	calc, err := inspect.NewRowStatCalculator(pf, inspect.RowStatOptions{
 		Columns: []int{idx},
 	})
--- a/cmd/tempo-cli/cmd-list-column.go
+++ b/cmd/tempo-cli/cmd-list-column.go
@ -38,7 +38,7 @@ func (cmd *listColumnCmd) Run(ctx *globalOptions) error {
 		return err
 	}

-	colIndex, _ := pq.GetColumnIndexByPath(pf, cmd.Column)
+	colIndex, _, _ := pq.GetColumnIndexByPath(pf, cmd.Column)

 	for i, rg := range pf.RowGroups() {

--- a/cmd/tempo-cli/cmd-rewrite-blocks_test.go
+++ b/cmd/tempo-cli/cmd-rewrite-blocks_test.go
@ -126,7 +126,7 @@ func getAllTraceIDs(t *testing.T, dir string, tenant string) []string {
 			err := r.Close()
 			require.NoError(t, err)
 		}()
-		traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
+		traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, vparquet4.TraceIDColumnName)
 		require.GreaterOrEqual(t, traceIDIndex, 0)
 		defer func() {
 			err := r.Close()
--- a/pkg/parquetquery/iters.go
+++ b/pkg/parquetquery/iters.go
@ -128,429 +128,7 @@ func (t *RowNumber) Valid() bool {
 // null   | 1 | 1 | {  0,  1, -1, -1 }
 // gb     | 1 | 3 | {  0,  2,  0,  0 }
 // null   | 0 | 1 | {  1,  0, -1, -1 }
-func (t *RowNumber) Next(repetitionLevel, definitionLevel int) {
-	t[repetitionLevel]++
-
-	// the following is nextSlow() unrolled
-	switch repetitionLevel {
-	case 0:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[1] = 0
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[1] = 0
-			t[2] = 0
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[1] = 0
-			t[2] = 0
-			t[3] = 0
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[1] = 0
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[1] = 0
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[1] = 0
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[1] = 0
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 1:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[2] = 0
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[2] = 0
-			t[3] = 0
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[2] = 0
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 2:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[3] = 0
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[3] = 0
-			t[4] = 0
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[3] = 0
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 3:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[4] = 0
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[4] = 0
-			t[5] = 0
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[4] = 0
-			t[5] = 0
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 4:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[5] = 0
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[5] = 0
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[5] = 0
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 5:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[6] = 0
-			t[7] = -1
-		case 7:
-			t[6] = 0
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 6:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[7] = -1
-		case 7:
-			t[7] = 0
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	case 7:
-		switch definitionLevel {
-		case 0:
-			t[1] = -1
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 1:
-			t[2] = -1
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 2:
-			t[3] = -1
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 3:
-			t[4] = -1
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 4:
-			t[5] = -1
-			t[6] = -1
-			t[7] = -1
-		case 5:
-			t[6] = -1
-			t[7] = -1
-		case 6:
-			t[7] = -1
-		case 7:
-		default:
-			panicWhenInvalidDefinitionLevel(definitionLevel)
-		}
-	}
-}
-
-// nextSlow is the original implementation of next. it is kept to test against
-// the unrolled version above
-func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
+func (t *RowNumber) Next(repetitionLevel, definitionLevel, maxDefinitionLevel int) {
 	t[repetitionLevel]++

 	// New children up through the definition level
@ -559,7 +137,7 @@ func (t *RowNumber) nextSlow(repetitionLevel, definitionLevel int) {
 	}

 	// // Children past the definition level are undefined
-	for i := definitionLevel + 1; i < len(t); i++ {
+	for i := definitionLevel + 1; i < len(t) && i <= maxDefinitionLevel; i++ {
 		t[i] = -1
 	}
 }
@ -803,13 +381,15 @@ type SyncIterator struct {
 	currPageN       int
 	at              IteratorResult // Current value pointed at by iterator. Returned by call Next and SeekTo, valid until next call.

+	maxDefinitionLevel int
+
 	intern   bool
 	interner *intern.Interner
 }

 var _ Iterator = (*SyncIterator)(nil)

-func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, opts ...SyncIteratorOpt) *SyncIterator {
+func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int, opts ...SyncIteratorOpt) *SyncIterator {
 	// Assign row group bounds.
 	// Lower bound is inclusive
 	// Upper bound is exclusive, points at the first row of the next group
@ -841,16 +421,17 @@ func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnN

 	// Create the iterator
 	i := &SyncIterator{
-		span:       span,
-		column:     column,
-		columnName: columnName,
-		rgs:        rgs,
-		readSize:   readSize,
-		rgsMin:     rgsMin,
-		rgsMax:     rgsMax,
-		filter:     filter,
-		curr:       EmptyRowNumber(),
-		at:         at,
+		span:               span,
+		column:             column,
+		columnName:         columnName,
+		rgs:                rgs,
+		readSize:           readSize,
+		rgsMin:             rgsMin,
+		rgsMax:             rgsMax,
+		filter:             filter,
+		curr:               EmptyRowNumber(),
+		at:                 at,
+		maxDefinitionLevel: maxDefinitionLevel,
 	}

 	// Apply options
@ -1160,7 +741,7 @@ func (c *SyncIterator) next() (RowNumber, *pq.Value, error) {

 			// Inspect all values to track the current row number,
 			// even if the value is filtered out next.
-			c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel())
+			c.curr.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)
 			c.currBufN++
 			c.currPageN++

@ -1260,11 +841,12 @@ func (c *SyncIterator) Close() {
 // the optional predicate to each chunk, page, and value.  Results are read by calling
 // Next() until it returns nil.
 type ColumnIterator struct {
-	rgs      []pq.RowGroup
-	col      int
-	colName  string
-	filter   *InstrumentedPredicate
-	selectAs string
+	rgs                []pq.RowGroup
+	col                int
+	colName            string
+	filter             *InstrumentedPredicate
+	selectAs           string
+	maxDefinitionLevel int

 	// Row number to seek to, protected by mutex.
 	// Less allocs than storing in atomic.Value
@ -1288,16 +870,17 @@ type columnIteratorBuffer struct {
 	values     []pq.Value
 }

-func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string) *ColumnIterator {
+func NewColumnIterator(ctx context.Context, rgs []pq.RowGroup, column int, columnName string, readSize int, filter Predicate, selectAs string, maxDefinitionLevel int) *ColumnIterator {
 	c := &ColumnIterator{
-		rgs:      rgs,
-		col:      column,
-		colName:  columnName,
-		filter:   &InstrumentedPredicate{pred: filter},
-		selectAs: selectAs,
-		quit:     make(chan struct{}),
-		ch:       make(chan *columnIteratorBuffer, 1),
-		currN:    -1,
+		rgs:                rgs,
+		col:                column,
+		colName:            columnName,
+		filter:             &InstrumentedPredicate{Pred: filter},
+		selectAs:           selectAs,
+		quit:               make(chan struct{}),
+		ch:                 make(chan *columnIteratorBuffer, 1),
+		currN:              -1,
+		maxDefinitionLevel: maxDefinitionLevel,
 	}

 	c.iter = func() { c.iterate(ctx, readSize) }
@ -1417,7 +1000,7 @@ func (c *ColumnIterator) iterate(ctx context.Context, readSize int) {

 								// We have to do this for all values (even if the
 								// value is excluded by the predicate)
-								rn.Next(v.RepetitionLevel(), v.DefinitionLevel())
+								rn.Next(v.RepetitionLevel(), v.DefinitionLevel(), c.maxDefinitionLevel)

 								if c.filter != nil {
 									if !c.filter.KeepValue(v) {
@ -2015,7 +1598,6 @@ func (u *UnionIterator) Next() (*IteratorResult, error) {
 			if err != nil {
 				return nil, fmt.Errorf("union iterator peek failed: %w", err)
 			}
-
 			// If this iterator is exhausted go to the next one
 			if rn == nil {
 				continue
--- a/pkg/parquetquery/iters_test.go
+++ b/pkg/parquetquery/iters_test.go
@ -3,7 +3,6 @@ package parquetquery
 import (
 	"context"
 	"math"
-	"math/rand"
 	"os"
 	"strconv"
 	"testing"
@ -19,30 +18,13 @@ var iterTestCases = []struct {
 	makeIter makeTestIterFn
 }{
 	{"async", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
-		return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
+		return NewColumnIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
 	}},
 	{"sync", func(pf *parquet.File, idx int, filter Predicate, selectAs string) Iterator {
-		return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs)
+		return NewSyncIterator(context.TODO(), pf.RowGroups(), idx, selectAs, 1000, filter, selectAs, MaxDefinitionLevel)
 	}},
 }

-// TestNext compares the unrolled Next() with the original nextSlow() to
-// prevent drift
-func TestNext(t *testing.T) {
-	rn1 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
-	rn2 := RowNumber{0, 0, 0, 0, 0, 0, 0, 0}
-
-	for i := 0; i < 1000; i++ {
-		r := rand.Intn(MaxDefinitionLevel + 1)
-		d := rand.Intn(MaxDefinitionLevel + 1)
-
-		rn1.Next(r, d)
-		rn2.nextSlow(r, d)
-
-		require.Equal(t, rn1, rn2)
-	}
-}
-
 // TestTruncate compares the unrolled TruncateRowNumber() with the original truncateRowNumberSlow() to
 // prevent drift
 func TestTruncateRowNumber(t *testing.T) {
@ -73,44 +55,26 @@ func TestInvalidDefinitionLevelTruncate(t *testing.T) {
 	})
 }

-func TestInvalidDefinitionLevelNext(t *testing.T) {
-	t.Run("Next -1", func(t *testing.T) {
-		assertPanic(t, func() {
-			rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
-			r := 0
-			d := -1
-			rn.Next(r, d)
-		})
-	})
-	t.Run("Next Max+1", func(t *testing.T) {
-		assertPanic(t, func() {
-			rn := RowNumber{1, 2, 3, 4, 5, 6, 7, 8}
-			r := 0
-			d := MaxDefinitionLevel + 1
-			rn.Next(r, d)
-		})
-	})
-}
-
-func TestRowNumber(t *testing.T) {
+func TestRowNumberNext(t *testing.T) {
 	tr := EmptyRowNumber()
 	require.Equal(t, RowNumber{-1, -1, -1, -1, -1, -1, -1, -1}, tr)

 	steps := []struct {
-		repetitionLevel int
-		definitionLevel int
-		expected        RowNumber
+		repetitionLevel    int
+		definitionLevel    int
+		maxDefinitionLevel int
+		expected           RowNumber
 	}{
 		// Name.Language.Country examples from the Dremel whitepaper
-		{0, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
-		{2, 2, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
-		{1, 1, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
-		{1, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
-		{0, 1, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
+		{0, 3, 3, RowNumber{0, 0, 0, 0, -1, -1, -1, -1}},
+		{2, 2, 3, RowNumber{0, 0, 1, -1, -1, -1, -1, -1}},
+		{1, 1, 3, RowNumber{0, 1, -1, -1, -1, -1, -1, -1}},
+		{1, 3, 3, RowNumber{0, 2, 0, 0, -1, -1, -1, -1}},
+		{0, 1, 3, RowNumber{1, 0, -1, -1, -1, -1, -1, -1}},
 	}

 	for _, step := range steps {
-		tr.Next(step.repetitionLevel, step.definitionLevel)
+		tr.Next(step.repetitionLevel, step.definitionLevel, step.maxDefinitionLevel)
 		require.Equal(t, step.expected, tr)
 	}
 }
@ -158,7 +122,7 @@ func testColumnIterator(t *testing.T, makeIter makeTestIterFn) {
 	count := 100_000
 	pf := createTestFile(t, count)

-	idx, _ := GetColumnIndexByPath(pf, "A")
+	idx, _, _ := GetColumnIndexByPath(pf, "A")
 	iter := makeIter(pf, idx, nil, "A")
 	defer iter.Close()

@ -187,7 +151,7 @@ func testColumnIteratorSeek(t *testing.T, makeIter makeTestIterFn) {
 	count := 10_000
 	pf := createTestFile(t, count)

-	idx, _ := GetColumnIndexByPath(pf, "A")
+	idx, _, _ := GetColumnIndexByPath(pf, "A")
 	iter := makeIter(pf, idx, nil, "A")
 	defer iter.Close()

@ -224,7 +188,7 @@ func testColumnIteratorPredicate(t *testing.T, makeIter makeTestIterFn) {

 	pred := NewIntBetweenPredicate(7001, 7003)

-	idx, _ := GetColumnIndexByPath(pf, "A")
+	idx, _, _ := GetColumnIndexByPath(pf, "A")
 	iter := makeIter(pf, idx, pred, "A")
 	defer iter.Close()

@ -253,7 +217,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
 	}

 	pf := createFileWith(t, rows)
-	idx, _ := GetColumnIndexByPath(pf, "A")
+	idx, _, _ := GetColumnIndexByPath(pf, "A")
 	readSize := 1000

 	readIter := func(iter Iterator) (int, error) {
@ -275,7 +239,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
 		// Cancel before iterating
 		ctx, cancel := context.WithCancel(context.TODO())
 		cancel()
-		iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
+		iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
 		count, err := readIter(iter)
 		require.ErrorContains(t, err, "context canceled")
 		require.Equal(t, 0, count)
@ -283,7 +247,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {

 	t.Run("cancelledPartial", func(t *testing.T) {
 		ctx, cancel := context.WithCancel(context.TODO())
-		iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A")
+		iter := NewColumnIterator(ctx, pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)

 		// Read some results
 		_, err := iter.Next()
@ -299,7 +263,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {

 	t.Run("closedEarly", func(t *testing.T) {
 		// Close before iterating
-		iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
+		iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)
 		iter.Close()
 		count, err := readIter(iter)
 		require.NoError(t, err)
@ -307,7 +271,7 @@ func TestColumnIteratorExitEarly(t *testing.T) {
 	})

 	t.Run("closedPartial", func(t *testing.T) {
-		iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A")
+		iter := NewColumnIterator(context.TODO(), pf.RowGroups(), idx, "", readSize, nil, "A", MaxDefinitionLevel)

 		// Read some results
 		_, err := iter.Next()
@ -335,7 +299,7 @@ func benchmarkColumnIterator(b *testing.B, makeIter makeTestIterFn) {
 	count := 100_000
 	pf := createTestFile(b, count)

-	idx, _ := GetColumnIndexByPath(pf, "A")
+	idx, _, _ := GetColumnIndexByPath(pf, "A")

 	b.ResetTimer()

--- a/pkg/parquetquery/predicate_test.go
+++ b/pkg/parquetquery/predicate_test.go
@ -237,9 +237,9 @@ func testPredicate(t *testing.T, tc predicateTestCase) {
 	r, err := parquet.OpenFile(file, int64(buf.Len()))
 	require.NoError(t, err)

-	p := InstrumentedPredicate{pred: tc.predicate}
+	p := InstrumentedPredicate{Pred: tc.predicate}

-	i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "")
+	i := NewColumnIterator(context.TODO(), r.RowGroups(), 0, "test", 100, &p, "", MaxDefinitionLevel)
 	for {
 		res, err := i.Next()
 		require.NoError(t, err)
--- a/pkg/parquetquery/predicates.go
+++ b/pkg/parquetquery/predicates.go
@ -372,7 +372,7 @@ func (p *OrPredicate) KeepValue(v pq.Value) bool {
 }

 type InstrumentedPredicate struct {
-	pred                  Predicate // Optional, if missing then just keeps metrics with no filtering
+	Pred                  Predicate // Optional, if missing then just keeps metrics with no filtering
 	InspectedColumnChunks int64
 	InspectedPages        int64
 	InspectedValues       int64
@ -384,16 +384,16 @@ type InstrumentedPredicate struct {
 var _ Predicate = (*InstrumentedPredicate)(nil)

 func (p *InstrumentedPredicate) String() string {
-	if p.pred == nil {
+	if p.Pred == nil {
 		return fmt.Sprintf("InstrumentedPredicate{%d, nil}", p.InspectedValues)
 	}
-	return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.pred)
+	return fmt.Sprintf("InstrumentedPredicate{%d, %s}", p.InspectedValues, p.Pred)
 }

 func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
 	p.InspectedColumnChunks++

-	if p.pred == nil || p.pred.KeepColumnChunk(c) {
+	if p.Pred == nil || p.Pred.KeepColumnChunk(c) {
 		p.KeptColumnChunks++
 		return true
 	}
@ -404,7 +404,7 @@ func (p *InstrumentedPredicate) KeepColumnChunk(c *ColumnChunkHelper) bool {
 func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
 	p.InspectedPages++

-	if p.pred == nil || p.pred.KeepPage(page) {
+	if p.Pred == nil || p.Pred.KeepPage(page) {
 		p.KeptPages++
 		return true
 	}
@ -415,7 +415,7 @@ func (p *InstrumentedPredicate) KeepPage(page pq.Page) bool {
 func (p *InstrumentedPredicate) KeepValue(v pq.Value) bool {
 	p.InspectedValues++

-	if p.pred == nil || p.pred.KeepValue(v) {
+	if p.Pred == nil || p.Pred.KeepValue(v) {
 		p.KeptValues++
 		return true
 	}
--- a/pkg/parquetquery/util.go
+++ b/pkg/parquetquery/util.go
@ -6,23 +6,23 @@ import (
 	pq "github.com/parquet-go/parquet-go"
 )

-func GetColumnIndexByPath(pf *pq.File, s string) (index, depth int) {
+func GetColumnIndexByPath(pf *pq.File, s string) (index, depth, maxDef int) {
 	colSelector := strings.Split(s, ".")
 	n := pf.Root()
 	for len(colSelector) > 0 {
 		n = n.Column(colSelector[0])
 		if n == nil {
-			return -1, -1
+			return -1, -1, -1
 		}

 		colSelector = colSelector[1:]
 		depth++
 	}

-	return n.Index(), depth
+	return n.Index(), depth, n.MaxDefinitionLevel()
 }

 func HasColumn(pf *pq.File, s string) bool {
-	index, _ := GetColumnIndexByPath(pf, s)
+	index, _, _ := GetColumnIndexByPath(pf, s)
 	return index >= 0
 }
--- a/tempodb/encoding/vparquet2/block_findtracebyid.go
+++ b/tempodb/encoding/vparquet2/block_findtracebyid.go
@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt

 func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
 	// traceID column index
-	colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
+	colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if colIndex == -1 {
 		return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
 	}
@ -227,7 +227,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
 	}

 	// Now iterate the matching row group
-	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
+	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
 	defer iter.Close()

 	res, err := iter.Next()
--- a/tempodb/encoding/vparquet2/block_iterator.go
+++ b/tempodb/encoding/vparquet2/block_iterator.go
@ -34,7 +34,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
 		return nil, err
 	}

-	traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if traceIDIndex < 0 {
 		return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
 	}
--- a/tempodb/encoding/vparquet2/block_search.go
+++ b/tempodb/encoding/vparquet2/block_search.go
@ -356,14 +356,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 	async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue

 	return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
-		index, _ := pq.GetColumnIndexByPath(pf, name)
+		index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
 		if index == -1 {
 			// TODO - don't panic, error instead
 			panic("column not found in parquet file:" + name)
 		}

 		if async {
-			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
+			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
 		}

 		var opts []pq.SyncIteratorOpt
@ -371,7 +371,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 			opts = append(opts, pq.SyncIteratorOptIntern())
 		}

-		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
+		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
 	}
 }

--- a/tempodb/encoding/vparquet2/block_search_tags.go
+++ b/tempodb/encoding/vparquet2/block_search_tags.go
@ -69,14 +69,14 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
 		specialAttrIdxs := map[int]string{}

 		// standard resource attributes
-		resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
+		resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
 		if resourceKeyIdx == -1 {
 			return fmt.Errorf("resource attributes col not found (%d)", resourceKeyIdx)
 		}

 		// special resource attributes
 		for lbl, col := range specialMappings {
-			idx, _ := pq.GetColumnIndexByPath(pf, col)
+			idx, _, _ := pq.GetColumnIndexByPath(pf, col)
 			if idx == -1 {
 				continue
 			}
--- a/tempodb/encoding/vparquet2/wal_block.go
+++ b/tempodb/encoding/vparquet2/wal_block.go
@ -244,7 +244,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {

 	pf := file.parquetFile

-	idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	r := parquet.NewReader(pf)
 	return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
 }
--- a/tempodb/encoding/vparquet3/block_autocomplete.go
+++ b/tempodb/encoding/vparquet3/block_autocomplete.go
@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
 	// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
 	//   atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
 	hasValues := func(path string, pf *parquet.File) bool {
-		idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
+		idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
 		md := pf.Metadata()
 		for _, rg := range md.RowGroups {
 			col := rg.Columns[idx]
--- a/tempodb/encoding/vparquet3/block_findtracebyid.go
+++ b/tempodb/encoding/vparquet3/block_findtracebyid.go
@ -139,7 +139,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt

 func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.TraceByIDResponse, error) {
 	// traceID column index
-	colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
+	colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if colIndex == -1 {
 		return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
 	}
@ -230,7 +230,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
 	}

 	// Now iterate the matching row group
-	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
+	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
 	defer iter.Close()

 	res, err := iter.Next()
--- a/tempodb/encoding/vparquet3/block_iterator.go
+++ b/tempodb/encoding/vparquet3/block_iterator.go
@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
 		return nil, err
 	}

-	traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if traceIDIndex < 0 {
 		return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
 	}
--- a/tempodb/encoding/vparquet3/block_search.go
+++ b/tempodb/encoding/vparquet3/block_search.go
@ -354,14 +354,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 	async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue

 	return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
-		index, _ := pq.GetColumnIndexByPath(pf, name)
+		index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
 		if index == -1 {
 			// TODO - don't panic, error instead
 			panic("column not found in parquet file:" + name)
 		}

 		if async {
-			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
+			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
 		}

 		var opts []pq.SyncIteratorOpt
@ -369,7 +369,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 			opts = append(opts, pq.SyncIteratorOptIntern())
 		}

-		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
+		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
 	}
 }

--- a/tempodb/encoding/vparquet3/block_search_tags.go
+++ b/tempodb/encoding/vparquet3/block_search_tags.go
@ -70,11 +70,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
 		specialAttrIdxs := map[int]string{}

 		// standard attributes
-		resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
+		resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)

 		// special attributes
 		for lbl, col := range specialMappings {
-			idx, _ := pq.GetColumnIndexByPath(pf, col)
+			idx, _, _ := pq.GetColumnIndexByPath(pf, col)
 			if idx == -1 {
 				continue
 			}
@ -84,7 +84,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC

 		// dedicated attributes
 		columnMapping.forEach(func(lbl string, c dedicatedColumn) {
-			idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
+			idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
 			if idx == -1 {
 				return
 			}
--- a/tempodb/encoding/vparquet3/wal_block.go
+++ b/tempodb/encoding/vparquet3/wal_block.go
@ -249,7 +249,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {

 	pf := file.parquetFile

-	idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	r := parquet.NewReader(pf)
 	return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
 }
--- a/tempodb/encoding/vparquet4/block_autocomplete.go
+++ b/tempodb/encoding/vparquet4/block_autocomplete.go
@ -99,7 +99,7 @@ func tagNamesForSpecialColumns(scope traceql.AttributeScope, pf *parquet.File, d
 	// - use rep/def levels to determine if a value exists at a row w/o actually testing values.
 	//   atm i believe this requires reading the pages themselves b/c the rep/def lvls come w/ the page
 	hasValues := func(path string, pf *parquet.File) bool {
-		idx, _ := parquetquery.GetColumnIndexByPath(pf, path)
+		idx, _, _ := parquetquery.GetColumnIndexByPath(pf, path)
 		md := pf.Metadata()
 		for _, rg := range md.RowGroups {
 			col := rg.Columns[idx]
--- a/tempodb/encoding/vparquet4/block_autocomplete_test.go
+++ b/tempodb/encoding/vparquet4/block_autocomplete_test.go
@ -3,17 +3,13 @@ package vparquet4
 import (
 	"context"
 	"fmt"
-	"path"
 	"sort"
 	"testing"

-	"github.com/google/uuid"
 	"github.com/grafana/tempo/pkg/collector"
 	"github.com/grafana/tempo/pkg/tempopb"
 	"github.com/grafana/tempo/pkg/traceql"
 	"github.com/grafana/tempo/pkg/util/test"
-	"github.com/grafana/tempo/tempodb/backend"
-	"github.com/grafana/tempo/tempodb/backend/local"
 	"github.com/grafana/tempo/tempodb/encoding/common"
 	"github.com/stretchr/testify/require"
 )
@ -701,21 +697,7 @@ func BenchmarkFetchTagValues(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
-	blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
-
-	r, _, _, err := local.New(&local.Config{
-		// Path: path.Join("/Users/marty/src/tmp/"),
-		Path: path.Join("/Users/joe/testblock"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
-	block := newBackendBlock(meta, rr)
+	block := blockForBenchmarks(b)
 	opts := common.DefaultSearchOptions()

 	for _, tc := range testCases {
@ -782,21 +764,7 @@ func BenchmarkFetchTags(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	// blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
-	blockID := uuid.MustParse("00145f38-6058-4e57-b1ba-334db8edce23")
-
-	r, _, _, err := local.New(&local.Config{
-		// Path: path.Join("/Users/marty/src/tmp/"),
-		Path: path.Join("/Users/joe/testblock"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
-	block := newBackendBlock(meta, rr)
+	block := blockForBenchmarks(b)
 	opts := common.DefaultSearchOptions()

 	for _, tc := range testCases {
--- a/tempodb/encoding/vparquet4/block_findtracebyid.go
+++ b/tempodb/encoding/vparquet4/block_findtracebyid.go
@ -146,7 +146,7 @@ func (b *backendBlock) FindTraceByID(ctx context.Context, traceID common.ID, opt

 func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMeta, pf *parquet.File, rowGroup int) (*tempopb.Trace, error) {
 	// traceID column index
-	colIndex, _ := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
+	colIndex, _, maxDef := pq.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if colIndex == -1 {
 		return nil, fmt.Errorf("unable to get index for column: %s", TraceIDColumnName)
 	}
@ -237,7 +237,7 @@ func findTraceByID(ctx context.Context, traceID common.ID, meta *backend.BlockMe
 	}

 	// Now iterate the matching row group
-	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "")
+	iter := parquetquery.NewColumnIterator(ctx, pf.RowGroups()[rowGroup:rowGroup+1], colIndex, "", 1000, parquetquery.NewStringInPredicate([]string{string(traceID)}), "", maxDef)
 	defer iter.Close()

 	res, err := iter.Next()
--- a/tempodb/encoding/vparquet4/block_findtracebyid_test.go
+++ b/tempodb/encoding/vparquet4/block_findtracebyid_test.go
@ -144,52 +144,10 @@ func TestBackendBlockFindTraceByID_TestData(t *testing.T) {
 	}
 }

-/*func genIndex(t require.TestingT, block *backendBlock) *index {
-	pf, _, err := block.openForSearch(context.TODO(), common.DefaultSearchOptions())
-	require.NoError(t, err)
-
-	i := &index{}
-
-	for j := range pf.RowGroups() {
-		iter := parquetquery.NewSyncIterator(context.TODO(), pf.RowGroups()[j:j+1], 0, "", 1000, nil, "TraceID")
-		defer iter.Close()
-
-		for {
-			v, err := iter.Next()
-			require.NoError(t, err)
-			if v == nil {
-				break
-			}
-
-			i.Add(v.Entries[0].Value.ByteArray())
-		}
-		i.Flush()
-	}
-
-	return i
-}*/
-
 func BenchmarkFindTraceByID(b *testing.B) {
-	var (
-		ctx      = context.TODO()
-		tenantID = "1"
-		blockID  = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
-		path     = "/Users/marty/src/tmp/"
-	)
-
-	r, _, _, err := local.New(&local.Config{
-		Path: path,
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	// ww := backend.NewWriter(w)
-
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
+	ctx := context.TODO()
 	traceID := []byte{}
-	block := newBackendBlock(meta, rr)
+	block := blockForBenchmarks(b)

 	// index := genIndex(b, block)
 	// writeBlockMeta(ctx, ww, meta, &common.ShardedBloomFilter{}, index)
--- a/tempodb/encoding/vparquet4/block_iterator.go
+++ b/tempodb/encoding/vparquet4/block_iterator.go
@ -41,7 +41,7 @@ func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator
 		return nil, err
 	}

-	traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	if traceIDIndex < 0 {
 		return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
 	}
--- a/tempodb/encoding/vparquet4/block_search.go
+++ b/tempodb/encoding/vparquet4/block_search.go
@ -357,14 +357,14 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 	async := os.Getenv(EnvVarAsyncIteratorName) == EnvVarAsyncIteratorValue

 	return func(name string, predicate pq.Predicate, selectAs string) pq.Iterator {
-		index, _ := pq.GetColumnIndexByPath(pf, name)
+		index, _, maxDef := pq.GetColumnIndexByPath(pf, name)
 		if index == -1 {
 			// TODO - don't panic, error instead
 			panic("column not found in parquet file:" + name)
 		}

 		if async {
-			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs)
+			return pq.NewColumnIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef)
 		}

 		var opts []pq.SyncIteratorOpt
@ -372,7 +372,7 @@ func makeIterFunc(ctx context.Context, rgs []parquet.RowGroup, pf *parquet.File)
 			opts = append(opts, pq.SyncIteratorOptIntern())
 		}

-		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, opts...)
+		return pq.NewSyncIterator(ctx, rgs, index, name, 1000, predicate, selectAs, maxDef, opts...)
 	}
 }

--- a/tempodb/encoding/vparquet4/block_search_tags.go
+++ b/tempodb/encoding/vparquet4/block_search_tags.go
@ -71,11 +71,11 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC
 		specialAttrIdxs := map[int]string{}

 		// standard attributes
-		resourceKeyIdx, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)
+		resourceKeyIdx, _, _ := pq.GetColumnIndexByPath(pf, standardKeyPath)

 		// special attributes
 		for lbl, col := range specialMappings {
-			idx, _ := pq.GetColumnIndexByPath(pf, col)
+			idx, _, _ := pq.GetColumnIndexByPath(pf, col)
 			if idx == -1 {
 				continue
 			}
@ -85,7 +85,7 @@ func searchTags(_ context.Context, scope traceql.AttributeScope, cb common.TagsC

 		// dedicated attributes
 		columnMapping.forEach(func(lbl string, c dedicatedColumn) {
-			idx, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
+			idx, _, _ := pq.GetColumnIndexByPath(pf, c.ColumnPath)
 			if idx == -1 {
 				return
 			}
--- a/tempodb/encoding/vparquet4/block_search_tags_test.go
+++ b/tempodb/encoding/vparquet4/block_search_tags_test.go
@ -2,14 +2,10 @@ package vparquet4

 import (
 	"context"
-	"path"
 	"testing"

-	"github.com/google/uuid"
 	"github.com/grafana/tempo/pkg/collector"
 	"github.com/grafana/tempo/pkg/traceql"
-	"github.com/grafana/tempo/tempodb/backend"
-	"github.com/grafana/tempo/tempodb/backend/local"
 	"github.com/grafana/tempo/tempodb/encoding/common"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
@ -189,19 +185,8 @@ func TestBackendBlockSearchTagValuesV2(t *testing.T) {

 func BenchmarkBackendBlockSearchTags(b *testing.B) {
 	ctx := context.TODO()
-	tenantID := "1"
-	blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
+	block := blockForBenchmarks(b)

-	r, _, _, err := local.New(&local.Config{
-		Path: path.Join("/Users/marty/src/tmp/"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
-	block := newBackendBlock(meta, rr)
 	opts := common.DefaultSearchOptions()
 	d := collector.NewDistinctString(1_000_000, 0, 0)
 	mc := collector.NewMetricsCollector()
@ -221,19 +206,7 @@ func BenchmarkBackendBlockSearchTagValues(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
-
-	r, _, _, err := local.New(&local.Config{
-		Path: path.Join("/Users/marty/src/tmp/"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
-	block := newBackendBlock(meta, rr)
+	block := blockForBenchmarks(b)
 	opts := common.DefaultSearchOptions()

 	for _, tc := range testCases {
--- a/tempodb/encoding/vparquet4/block_search_test.go
+++ b/tempodb/encoding/vparquet4/block_search_test.go
@ -3,7 +3,6 @@ package vparquet4
 import (
 	"context"
 	"math/rand"
-	"path"
 	"testing"
 	"time"

@ -412,19 +411,7 @@ func BenchmarkBackendBlockSearchTraces(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	blockID := uuid.MustParse("3685ee3d-cbbf-4f36-bf28-93447a19dea6")
-
-	r, _, _, err := local.New(&local.Config{
-		Path: path.Join("/Users/marty/src/tmp/"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
-	block := newBackendBlock(meta, rr)
+	block := blockForBenchmarks(b)

 	opts := common.DefaultSearchOptions()
 	opts.StartPage = 10
--- a/tempodb/encoding/vparquet4/block_traceql_test.go
+++ b/tempodb/encoding/vparquet4/block_traceql_test.go
@ -6,7 +6,6 @@ import (
 	"fmt"
 	"math/rand"
 	"os"
-	"path"
 	"sort"
 	"strconv"
 	"strings"
@ -17,6 +16,7 @@ import (
 	"github.com/stretchr/testify/require"

 	"github.com/grafana/tempo/pkg/parquetquery"
+	pq "github.com/grafana/tempo/pkg/parquetquery"
 	"github.com/grafana/tempo/pkg/tempopb"
 	v1 "github.com/grafana/tempo/pkg/tempopb/trace/v1"
 	"github.com/grafana/tempo/pkg/traceql"
@ -957,28 +957,13 @@ func BenchmarkBackendBlockTraceQL(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
-	// blockID := uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
-	blockID := uuid.MustParse("030c8c4f-9d47-4916-aadc-26b90b1d2bc4")
-
-	r, _, _, err := local.New(&local.Config{
-		// Path: path.Join("/Users/marty/src/tmp"),
-		// Path: path.Join("/Users/mapno/workspace/testblock"),
-		Path: path.Join("/Users/joe/testblock"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-
 	opts := common.DefaultSearchOptions()
 	opts.StartPage = 3
 	opts.TotalPages = 2

-	block := newBackendBlock(meta, rr)
-	_, _, err = block.openForSearch(ctx, opts)
+	block := blockForBenchmarks(b)
+
+	_, _, err := block.openForSearch(ctx, opts)
 	require.NoError(b, err)

 	for _, tc := range testCases {
@ -1016,27 +1001,12 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
 	}

 	ctx := context.TODO()
-	tenantID := "1"
-	// blockID := uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
-	blockID := uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
-
-	r, _, _, err := local.New(&local.Config{
-		// Path: path.Join("/Users/marty/src/tmp/"),
-		Path: path.Join("/Users/suraj/wd/grafana/testblock"),
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-	require.Equal(b, VersionString, meta.Version)
-
 	opts := common.DefaultSearchOptions()
 	opts.StartPage = 10
 	opts.TotalPages = 10

-	block := newBackendBlock(meta, rr)
-	_, _, err = block.openForSearch(ctx, opts)
+	block := blockForBenchmarks(b)
+	_, _, err := block.openForSearch(ctx, opts)
 	require.NoError(b, err)

 	for _, tc := range testCases {
@ -1057,6 +1027,71 @@ func BenchmarkBackendBlockGetMetrics(b *testing.B) {
 	}
 }

+// BenchmarkIterators is a convenient method to run benchmarks on various iterator constructions directly when working on optimizations.
+// Replace the iterator at the beginning of the benchmark loop with any combination desired.
+func BenchmarkIterators(b *testing.B) {
+	ctx := context.TODO()
+	opts := common.DefaultSearchOptions()
+	opts.StartPage = 3
+	opts.TotalPages = 2
+
+	block := blockForBenchmarks(b)
+	pf, _, err := block.openForSearch(ctx, opts)
+	require.NoError(b, err)
+
+	rgs := pf.RowGroups()
+	rgs = rgs[3:5]
+
+	var instrPred *parquetquery.InstrumentedPredicate
+	makeIterInternal := makeIterFunc(ctx, rgs, pf)
+	makeIter := func(columnName string, predicate pq.Predicate, selectAs string) pq.Iterator {
+		instrPred = &parquetquery.InstrumentedPredicate{
+			Pred: predicate,
+		}
+
+		return makeIterInternal(columnName, predicate, selectAs)
+	}
+
+	b.ResetTimer()
+	for i := 0; i < b.N; i++ {
+		err := error(nil)
+
+		iter := makeIter(columnPathSpanAttrKey, parquetquery.NewSubstringPredicate("e"), "foo")
+
+		//parquetquery.NewUnionIterator(DefinitionLevelResourceSpansILSSpanAttrs, []parquetquery.Iterator{
+		// makeIter(columnPathSpanHTTPStatusCode, parquetquery.NewIntEqualPredicate(500), "http_status"),
+		// makeIter(columnPathSpanName, parquetquery.NewStringEqualPredicate([]byte("foo")), "name"),
+		// makeIter(columnPathSpanStatusCode, parquetquery.NewIntEqualPredicate(2), "status"),
+		// makeIter(columnPathSpanAttrDouble, parquetquery.NewFloatEqualPredicate(500), "double"),
+		//makeIter(columnPathSpanAttrInt, parquetquery.NewIntEqualPredicate(500), "int"),
+		//}, nil)
+		require.NoError(b, err)
+		// fmt.Println(iter.String())
+
+		count := 0
+		for {
+			res, err := iter.Next()
+			if err != nil {
+				panic(err)
+			}
+			if res == nil {
+				break
+			}
+			count++
+		}
+		iter.Close()
+		if instrPred != nil {
+			b.ReportMetric(float64(count), "count")
+			b.ReportMetric(float64(instrPred.InspectedColumnChunks), "stats_cc")
+			b.ReportMetric(float64(instrPred.KeptColumnChunks), "stats_cc_kept")
+			b.ReportMetric(float64(instrPred.InspectedPages), "stats_ip")
+			b.ReportMetric(float64(instrPred.KeptPages), "stats_ip_kept")
+			b.ReportMetric(float64(instrPred.InspectedValues), "stats_v")
+			b.ReportMetric(float64(instrPred.KeptValues), "stats_v_kept")
+		}
+	}
+}
+
 func BenchmarkBackendBlockQueryRange(b *testing.B) {
 	testCases := []string{
 		"{} | rate()",
@ -1067,32 +1102,13 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
 		"{status=error} | rate()",
 	}

-	var (
-		ctx      = context.TODO()
-		e        = traceql.NewEngine()
-		tenantID = "1"
-		// blockID  = uuid.MustParse("06ebd383-8d4e-4289-b0e9-cf2197d611d5")
-		// blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
-		blockID = uuid.MustParse("257e3a56-224a-4ebe-9696-1b304f456ac2")
-		// path    = "/Users/marty/src/tmp/"
-		// path    = "/Users/mapno/workspace/testblock"
-		path = "/Users/suraj/wd/grafana/testblock"
-	)
-
-	r, _, _, err := local.New(&local.Config{
-		Path: path,
-	})
-	require.NoError(b, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(b, err)
-	require.Equal(b, VersionString, meta.Version)
-
+	e := traceql.NewEngine()
+	ctx := context.TODO()
 	opts := common.DefaultSearchOptions()
 	opts.TotalPages = 10
-	block := newBackendBlock(meta, rr)
-	_, _, err = block.openForSearch(ctx, opts)
+
+	block := blockForBenchmarks(b)
+	_, _, err := block.openForSearch(ctx, opts)
 	require.NoError(b, err)

 	f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
@ -1103,10 +1119,10 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
 		b.Run(tc, func(b *testing.B) {
 			for _, minutes := range []int{5, 7} {
 				b.Run(strconv.Itoa(minutes), func(b *testing.B) {
-					st := meta.StartTime
+					st := block.meta.StartTime
 					end := st.Add(time.Duration(minutes) * time.Minute)

-					if end.After(meta.EndTime) {
+					if end.After(block.meta.EndTime) {
 						b.SkipNow()
 						return
 					}
@ -1137,92 +1153,6 @@ func BenchmarkBackendBlockQueryRange(b *testing.B) {
 	}
 }

-// TestBackendBlockQueryRange is the `TestOne` of metric queries.
-// It's skipped because it depends on a local block, like benchmarks
-//
-// You also need to manually print the iterator in `backendBlock.Fetch`,
-// because there is no access to the iterator in the test. Sad.
-func TestBackendBlockQueryRange(t *testing.T) {
-	if os.Getenv("debug") != "1" {
-		t.Skip()
-	}
-
-	testCases := []string{
-		"{} | rate()",
-		"{} | rate() by (name)",
-		"{} | rate() by (resource.service.name)",
-		"{} | rate() by (span.http.url)", // High cardinality attribute
-		"{resource.service.name=`tempo-ingester`} | rate()",
-		"{status=unset} | rate()",
-	}
-
-	const (
-		tenantID  = "1"
-		queryHint = "with(exemplars=true)"
-	)
-
-	var (
-		ctx     = context.TODO()
-		e       = traceql.NewEngine()
-		opts    = common.DefaultSearchOptions()
-		blockID = uuid.MustParse("0008e57d-069d-4510-a001-b9433b2da08c")
-		path    = path.Join("/Users/mapno/workspace/testblock")
-	)
-
-	r, _, _, err := local.New(&local.Config{
-		Path: path,
-	})
-	require.NoError(t, err)
-
-	rr := backend.NewReader(r)
-	meta, err := rr.BlockMeta(ctx, blockID, tenantID)
-	require.NoError(t, err)
-	require.Equal(t, VersionString, meta.Version)
-
-	block := newBackendBlock(meta, rr)
-	opts.TotalPages = 10
-	_, _, err = block.openForSearch(ctx, opts)
-	require.NoError(t, err)
-
-	f := traceql.NewSpansetFetcherWrapper(func(ctx context.Context, req traceql.FetchSpansRequest) (traceql.FetchSpansResponse, error) {
-		return block.Fetch(ctx, req, opts)
-	})
-
-	for _, tc := range testCases {
-		t.Run(tc, func(t *testing.T) {
-			st := meta.StartTime
-			end := st.Add(time.Duration(5) * time.Minute)
-
-			if end.After(meta.EndTime) {
-				t.SkipNow()
-				return
-			}
-
-			req := &tempopb.QueryRangeRequest{
-				Query: fmt.Sprintf("%s %s", tc, queryHint),
-				Step:  uint64(time.Minute),
-				Start: uint64(st.UnixNano()),
-				End:   uint64(end.UnixNano()),
-			}
-
-			eval, err := e.CompileMetricsQueryRange(req, 1, 0, false)
-			require.NoError(t, err)
-
-			require.NoError(t, eval.Do(ctx, f, uint64(block.meta.StartTime.UnixNano()), uint64(block.meta.EndTime.UnixNano())))
-
-			ss := eval.Results()
-			require.NotNil(t, ss)
-
-			for _, s := range ss {
-				if s.Exemplars != nil && len(s.Exemplars) > 0 {
-					fmt.Println("series", s.Labels)
-					fmt.Println("Exemplars", s.Exemplars)
-				}
-			}
-		})
-	}
-}
-
 func ptr[T any](v T) *T {
 	return &v
 }
@ -2078,3 +2008,33 @@ func randomTree(N int) []traceql.Span {

 	return nodes
 }
+
+func blockForBenchmarks(b *testing.B) *backendBlock {
+	id, ok := os.LookupEnv("BENCH_BLOCKID")
+	if !ok {
+		b.Fatal("BENCH_BLOCKID is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_BLOCKID to the guid of the block to run benchmarks against. e.g. `export BENCH_BLOCKID=030c8c4f-9d47-4916-aadc-26b90b1d2bc4`")
+	}
+
+	path, ok := os.LookupEnv("BENCH_PATH")
+	if !ok {
+		b.Fatal("BENCH_PATH is not set. These benchmarks are designed to run against a block on local disk. Set BENCH_PATH to the root of the backend such that the block to benchmark is at <BENCH_PATH>/<BENCH_TENANTID>/<BENCH_BLOCKID>.")
+	}
+
+	tenantID, ok := os.LookupEnv("BENCH_TENANTID")
+
+	if !ok {
+		tenantID = "1"
+	}
+
+	blockID := uuid.MustParse(id)
+	r, _, _, err := local.New(&local.Config{
+		Path: path,
+	})
+	require.NoError(b, err)
+
+	rr := backend.NewReader(r)
+	meta, err := rr.BlockMeta(context.Background(), blockID, tenantID)
+	require.NoError(b, err)
+
+	return newBackendBlock(meta, rr)
+}
--- a/tempodb/encoding/vparquet4/wal_block.go
+++ b/tempodb/encoding/vparquet4/wal_block.go
@ -256,7 +256,7 @@ func (w *walBlockFlush) rowIterator() (*rowIterator, error) {

 	pf := file.parquetFile

-	idx, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
+	idx, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
 	r := parquet.NewReader(pf)
 	return newRowIterator(r, file, w.ids.EntriesSortedByID(), idx), nil
 }