tempo/tempodb/encoding/vparquet2/block_iterator.go
Joe Elliott c1f6280dd1 Do less Nexting (#4753)
* this is garbage

Signed-off-by: Joe Elliott <number101010@gmail.com>

* filtery stuff

Signed-off-by: Joe Elliott <number101010@gmail.com>

* fix

Signed-off-by: Joe Elliott <number101010@gmail.com>

* max def everywhere

Signed-off-by: Joe Elliott <number101010@gmail.com>

* clean up benches

Signed-off-by: Joe Elliott <number101010@gmail.com>

* clean up

Signed-off-by: Joe Elliott <number101010@gmail.com>

* remove vendor chagnes

Signed-off-by: Joe Elliott <number101010@gmail.com>

* changelog

Signed-off-by: Joe Elliott <number101010@gmail.com>

* add details about bench env vars

Signed-off-by: Joe Elliott <number101010@gmail.com>

---------

Signed-off-by: Joe Elliott <number101010@gmail.com>
2025-02-28 08:08:18 -05:00

88 lines
2.2 KiB
Go

package vparquet2
import (
"context"
"errors"
"fmt"
"io"
"github.com/parquet-go/parquet-go"
tempo_io "github.com/grafana/tempo/pkg/io"
"github.com/grafana/tempo/pkg/parquetquery"
"github.com/grafana/tempo/tempodb/encoding/common"
)
func (b *backendBlock) open(ctx context.Context) (*parquet.File, *parquet.Reader, error) { //nolint:all //deprecated
rr := NewBackendReaderAt(ctx, b.r, DataFileName, b.meta)
// 128 MB memory buffering
br := tempo_io.NewBufferedReaderAt(rr, int64(b.meta.Size_), 2*1024*1024, 64)
pf, err := parquet.OpenFile(br, int64(b.meta.Size_), parquet.SkipBloomFilters(true), parquet.SkipPageIndex(true))
if err != nil {
return nil, nil, err
}
r := parquet.NewReader(pf, parquet.SchemaOf(&Trace{}))
return pf, r, nil
}
func (b *backendBlock) rawIter(ctx context.Context, pool *rowPool) (*rawIterator, error) {
pf, r, err := b.open(ctx)
if err != nil {
return nil, err
}
traceIDIndex, _, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
if traceIDIndex < 0 {
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
}
return &rawIterator{b.meta.BlockID.String(), r, traceIDIndex, pool}, nil
}
type rawIterator struct {
blockID string
r *parquet.Reader //nolint:all //deprecated
traceIDIndex int
pool *rowPool
}
var _ RawIterator = (*rawIterator)(nil)
func (i *rawIterator) getTraceID(r parquet.Row) common.ID {
for _, v := range r {
if v.Column() == i.traceIDIndex {
// Important - clone to get a detached copy that lives outside the pool.
return v.Clone().ByteArray()
}
}
return nil
}
func (i *rawIterator) Next(context.Context) (common.ID, parquet.Row, error) {
rows := []parquet.Row{i.pool.Get()}
n, err := i.r.ReadRows(rows)
if n > 0 {
return i.getTraceID(rows[0]), rows[0], nil
}
if errors.Is(err, io.EOF) {
return nil, nil, nil
}
if err != nil {
return nil, nil, fmt.Errorf("error iterating through block %s: %w", i.blockID, err)
}
return nil, nil, nil
}
func (i *rawIterator) peekNextID(context.Context) (common.ID, error) { // nolint:unused // this is required to satisfy the bookmarkIterator interface
return nil, common.ErrUnsupported
}
func (i *rawIterator) Close() {
i.r.Close()
}