blob: b8f35b143ed01cc5baded1d2ba432b0505b15f90 [file] [log] [blame]
Austin Engcc2516a2023-10-17 20:57:54 +00001// Copyright 2021 The Dawn & Tint Authors
Ben Claytone8177da2021-06-01 15:41:11 +00002//
Austin Engcc2516a2023-10-17 20:57:54 +00003// Redistribution and use in source and binary forms, with or without
4// modification, are permitted provided that the following conditions are met:
Ben Claytone8177da2021-06-01 15:41:11 +00005//
Austin Engcc2516a2023-10-17 20:57:54 +00006// 1. Redistributions of source code must retain the above copyright notice, this
7// list of conditions and the following disclaimer.
Ben Claytone8177da2021-06-01 15:41:11 +00008//
Austin Engcc2516a2023-10-17 20:57:54 +00009// 2. Redistributions in binary form must reproduce the above copyright notice,
10// this list of conditions and the following disclaimer in the documentation
11// and/or other materials provided with the distribution.
12//
13// 3. Neither the name of the copyright holder nor the names of its
14// contributors may be used to endorse or promote products derived from
15// this software without specific prior written permission.
16//
17// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
21// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
23// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
24// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
25// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Ben Claytone8177da2021-06-01 15:41:11 +000027
28// Package lut provides a look up table, which compresses indexed data
29package lut
30
31import (
32 "sort"
Ben Claytone8177da2021-06-01 15:41:11 +000033)
34
35// LUT is a look up table.
36// The table holds a number of items that are stored in a linear list.
Ben Claytoneb2f95e2023-08-09 17:29:31 +000037type LUT[T comparable] interface {
Ben Claytone8177da2021-06-01 15:41:11 +000038 // Add adds a sequence of items to the table.
Ben Claytone8177da2021-06-01 15:41:11 +000039 // The sequence of items stored at [offset, offset+N), where N is the
40 // number of items added will remain equal, even after calling Compact().
Ben Claytoneb2f95e2023-08-09 17:29:31 +000041 // Returns a pointer to the start index in the list.
42 Add(items []T) *int
Ben Claytone8177da2021-06-01 15:41:11 +000043 // Compact reorders the table items so that the table storage is compacted
44 // by shuffling data around and de-duplicating sequences of common data.
45 // Each originally added sequence is preserved in the resulting table, with
46 // the same contiguous ordering, but with a potentially different offset.
47 // Heuristics are used to shorten the table length, by exploiting common
48 // subsequences, and removing duplicate sequences.
49 // Note that shortest common superstring is NP-hard, so heuristics are used.
50 // Compact updates pointers returned by Add().
Ben Claytoneb2f95e2023-08-09 17:29:31 +000051 Compact() []T
Ben Claytone8177da2021-06-01 15:41:11 +000052}
53
54// New returns a new look up table
Ben Claytoneb2f95e2023-08-09 17:29:31 +000055func New[T comparable]() LUT[T] {
56 return &lut[T]{storage: []T{}}
Ben Claytone8177da2021-06-01 15:41:11 +000057}
58
59// A sequence represents a span of entries in the table
60type sequence struct {
61 offset *int // Pointer to the start index of the sequence
62 count int // Length of the sequence
63}
64
65// lut implements LUT
Ben Claytoneb2f95e2023-08-09 17:29:31 +000066type lut[T comparable] struct {
67 storage []T // The storage
Ben Claytone8177da2021-06-01 15:41:11 +000068 sequences []sequence // The entries in the LUT
69}
70
Ben Claytoneb2f95e2023-08-09 17:29:31 +000071func (t *lut[T]) Add(items []T) *int {
72 if len(items) == 0 {
73 return nil
74 }
75
76 offset := len(t.storage)
77 count := len(items)
78 t.storage = append(t.storage, items...)
Ben Claytone8177da2021-06-01 15:41:11 +000079 offsetPtr := &offset
80 t.sequences = append(t.sequences, sequence{offsetPtr, count})
81 return offsetPtr
82}
83
Ben Claytoneb2f95e2023-08-09 17:29:31 +000084// match describes a sequence that can be placed.
85type match struct {
86 dst int // destination offset
87 src sequence // source sequence
88 len int // number of items that matched
89 idx int // sequence index
90}
91
92func (t lut[T]) Compact() []T {
Ben Claytone8177da2021-06-01 15:41:11 +000093 // Generate int32 identifiers for each unique item in the table.
94 // We use these to compare items instead of comparing the real data as this
95 // function is comparison-heavy, and integer compares are cheap.
96 srcIDs := t.itemIDs()
97 dstIDs := make([]int32, len(srcIDs))
98
99 // Make a copy the data held in the table, use the copy as the source, and
100 // t.storage as the destination.
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000101 srcData := make([]T, len(t.storage))
102 copy(srcData, t.storage)
Ben Claytone8177da2021-06-01 15:41:11 +0000103 dstData := t.storage
104
105 // Sort all the sequences by length, with the largest first.
106 // This helps 'seed' the compacted form with the largest items first.
107 // This can improve the compaction as small sequences can pack into larger,
108 // placed items.
Ben Clayton6be02b62022-11-16 21:30:13 +0000109 sort.SliceStable(t.sequences, func(i, j int) bool {
Ben Claytone8177da2021-06-01 15:41:11 +0000110 return t.sequences[i].count > t.sequences[j].count
111 })
112
113 // unplaced is the list of sequences that have not yet been placed.
114 // All sequences are initially unplaced.
115 unplaced := make([]sequence, len(t.sequences))
116 copy(unplaced, t.sequences)
117
118 // placed is the list of sequences that have been placed.
119 // Nothing is initially placed.
120 placed := make([]sequence, 0, len(t.sequences))
121
122 // remove removes the sequence in unplaced with the index i.
123 remove := func(i int) {
124 placed = append(placed, unplaced[i])
125 if i > 0 {
126 if i < len(unplaced)-1 {
127 copy(unplaced[i:], unplaced[i+1:])
128 }
129 unplaced = unplaced[:len(unplaced)-1]
130 } else {
131 unplaced = unplaced[1:]
132 }
133 }
134
135 // cp copies data from [srcOffset:srcOffset+count] to [dstOffset:dstOffset+count].
136 cp := func(dstOffset, srcOffset, count int) {
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000137 copy(dstData[dstOffset:dstOffset+count], srcData[srcOffset:srcOffset+count])
138 copy(dstIDs[dstOffset:dstOffset+count], srcIDs[srcOffset:srcOffset+count])
Ben Claytone8177da2021-06-01 15:41:11 +0000139 }
140
141 // number of items that have been placed.
142 newSize := 0
143
144 // While there's sequences to place...
145 for len(unplaced) > 0 {
146 // Place the next largest, unplaced sequence at the end of the new list
147 cp(newSize, *unplaced[0].offset, unplaced[0].count)
148 *unplaced[0].offset = newSize
149 newSize += unplaced[0].count
150 remove(0)
151
152 for {
153 // Look for the sequence with the longest match against the
154 // currently placed data. Any mismatches with currently placed data
155 // will nullify the match. The head or tail of this sequence may
156 // extend the currently placed data.
157 best := match{}
158
159 // For each unplaced sequence...
160 for i := 0; i < len(unplaced); i++ {
161 seq := unplaced[i]
162
163 if best.len >= seq.count {
164 // The best match is already at least as long as this
165 // sequence and sequences are sorted by size, so best cannot
166 // be beaten. Stop searching.
167 break
168 }
169
170 // Perform a full sweep from left to right, scoring the match...
171 for shift := -seq.count + 1; shift < newSize; shift++ {
172 dstS := max(shift, 0)
173 dstE := min(shift+seq.count, newSize)
174 count := dstE - dstS
175 srcS := *seq.offset - min(shift, 0)
176 srcE := srcS + count
177
178 if best.len < count {
179 if equal(srcIDs[srcS:srcE], dstIDs[dstS:dstE]) {
180 best = match{shift, seq, count, i}
181 }
182 }
183 }
184 }
185
186 if best.src.offset == nil {
187 // Nothing matched. Not even one element.
188 // Resort to placing the next largest sequence at the end.
189 break
190 }
191
192 if best.dst < 0 {
193 // Best match wants to place the sequence to the left of the
194 // current output. We have to shuffle everything...
195 n := -best.dst
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000196 copy(dstData[n:n+newSize], dstData)
Ben Claytone8177da2021-06-01 15:41:11 +0000197 copy(dstIDs[n:n+newSize], dstIDs)
198 newSize += n
199 best.dst = 0
200 for _, p := range placed {
201 *p.offset += n
202 }
203 }
204
205 // Place the best matching sequence.
206 cp(best.dst, *best.src.offset, best.src.count)
207 newSize = max(newSize, best.dst+best.src.count)
208 *best.src.offset = best.dst
209 remove(best.idx)
210 }
211 }
212
213 // Shrink the output buffer to the new size.
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000214 dstData = dstData[:newSize]
Ben Claytone8177da2021-06-01 15:41:11 +0000215
216 // All done.
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000217 return dstData
Ben Claytone8177da2021-06-01 15:41:11 +0000218}
219
220// Generate a set of identifiers for all the unique items in storage
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000221func (t lut[T]) itemIDs() []int32 {
222 storageSize := len(t.storage)
Ben Claytone8177da2021-06-01 15:41:11 +0000223 keys := make([]int32, storageSize)
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000224 dataToKey := map[T]int32{}
Ben Claytone8177da2021-06-01 15:41:11 +0000225 for i := 0; i < storageSize; i++ {
Ben Claytoneb2f95e2023-08-09 17:29:31 +0000226 data := t.storage[i]
Ben Claytone8177da2021-06-01 15:41:11 +0000227 key, found := dataToKey[data]
228 if !found {
229 key = int32(len(dataToKey))
230 dataToKey[data] = key
231 }
232 keys[i] = key
233 }
234 return keys
235}
236
237func max(a, b int) int {
238 if a < b {
239 return b
240 }
241 return a
242}
243
244func min(a, b int) int {
245 if a > b {
246 return b
247 }
248 return a
249}
250
251func equal(a, b []int32) bool {
252 for i, v := range a {
253 if b[i] != v {
254 return false
255 }
256 }
257 return true
258}