Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 1 | // Copyright 2021 The Dawn & Tint Authors |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 2 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 3 | // Redistribution and use in source and binary forms, with or without |
| 4 | // modification, are permitted provided that the following conditions are met: |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 5 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 6 | // 1. Redistributions of source code must retain the above copyright notice, this |
| 7 | // list of conditions and the following disclaimer. |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 8 | // |
Austin Eng | cc2516a | 2023-10-17 20:57:54 +0000 | [diff] [blame] | 9 | // 2. Redistributions in binary form must reproduce the above copyright notice, |
| 10 | // this list of conditions and the following disclaimer in the documentation |
| 11 | // and/or other materials provided with the distribution. |
| 12 | // |
| 13 | // 3. Neither the name of the copyright holder nor the names of its |
| 14 | // contributors may be used to endorse or promote products derived from |
| 15 | // this software without specific prior written permission. |
| 16 | // |
| 17 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
| 20 | // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE |
| 21 | // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| 22 | // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
| 23 | // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
| 24 | // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
| 25 | // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
| 26 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 27 | |
| 28 | // Package lut provides a look up table, which compresses indexed data |
| 29 | package lut |
| 30 | |
| 31 | import ( |
| 32 | "sort" |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 33 | ) |
| 34 | |
| 35 | // LUT is a look up table. |
| 36 | // The table holds a number of items that are stored in a linear list. |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 37 | type LUT[T comparable] interface { |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 38 | // Add adds a sequence of items to the table. |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 39 | // The sequence of items stored at [offset, offset+N), where N is the |
| 40 | // number of items added will remain equal, even after calling Compact(). |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 41 | // Returns a pointer to the start index in the list. |
| 42 | Add(items []T) *int |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 43 | // Compact reorders the table items so that the table storage is compacted |
| 44 | // by shuffling data around and de-duplicating sequences of common data. |
| 45 | // Each originally added sequence is preserved in the resulting table, with |
| 46 | // the same contiguous ordering, but with a potentially different offset. |
| 47 | // Heuristics are used to shorten the table length, by exploiting common |
| 48 | // subsequences, and removing duplicate sequences. |
| 49 | // Note that shortest common superstring is NP-hard, so heuristics are used. |
| 50 | // Compact updates pointers returned by Add(). |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 51 | Compact() []T |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 52 | } |
| 53 | |
| 54 | // New returns a new look up table |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 55 | func New[T comparable]() LUT[T] { |
| 56 | return &lut[T]{storage: []T{}} |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 57 | } |
| 58 | |
| 59 | // A sequence represents a span of entries in the table |
| 60 | type sequence struct { |
| 61 | offset *int // Pointer to the start index of the sequence |
| 62 | count int // Length of the sequence |
| 63 | } |
| 64 | |
| 65 | // lut implements LUT |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 66 | type lut[T comparable] struct { |
| 67 | storage []T // The storage |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 68 | sequences []sequence // The entries in the LUT |
| 69 | } |
| 70 | |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 71 | func (t *lut[T]) Add(items []T) *int { |
| 72 | if len(items) == 0 { |
| 73 | return nil |
| 74 | } |
| 75 | |
| 76 | offset := len(t.storage) |
| 77 | count := len(items) |
| 78 | t.storage = append(t.storage, items...) |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 79 | offsetPtr := &offset |
| 80 | t.sequences = append(t.sequences, sequence{offsetPtr, count}) |
| 81 | return offsetPtr |
| 82 | } |
| 83 | |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 84 | // match describes a sequence that can be placed. |
| 85 | type match struct { |
| 86 | dst int // destination offset |
| 87 | src sequence // source sequence |
| 88 | len int // number of items that matched |
| 89 | idx int // sequence index |
| 90 | } |
| 91 | |
| 92 | func (t lut[T]) Compact() []T { |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 93 | // Generate int32 identifiers for each unique item in the table. |
| 94 | // We use these to compare items instead of comparing the real data as this |
| 95 | // function is comparison-heavy, and integer compares are cheap. |
| 96 | srcIDs := t.itemIDs() |
| 97 | dstIDs := make([]int32, len(srcIDs)) |
| 98 | |
| 99 | // Make a copy the data held in the table, use the copy as the source, and |
| 100 | // t.storage as the destination. |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 101 | srcData := make([]T, len(t.storage)) |
| 102 | copy(srcData, t.storage) |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 103 | dstData := t.storage |
| 104 | |
| 105 | // Sort all the sequences by length, with the largest first. |
| 106 | // This helps 'seed' the compacted form with the largest items first. |
| 107 | // This can improve the compaction as small sequences can pack into larger, |
| 108 | // placed items. |
Ben Clayton | 6be02b6 | 2022-11-16 21:30:13 +0000 | [diff] [blame] | 109 | sort.SliceStable(t.sequences, func(i, j int) bool { |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 110 | return t.sequences[i].count > t.sequences[j].count |
| 111 | }) |
| 112 | |
| 113 | // unplaced is the list of sequences that have not yet been placed. |
| 114 | // All sequences are initially unplaced. |
| 115 | unplaced := make([]sequence, len(t.sequences)) |
| 116 | copy(unplaced, t.sequences) |
| 117 | |
| 118 | // placed is the list of sequences that have been placed. |
| 119 | // Nothing is initially placed. |
| 120 | placed := make([]sequence, 0, len(t.sequences)) |
| 121 | |
| 122 | // remove removes the sequence in unplaced with the index i. |
| 123 | remove := func(i int) { |
| 124 | placed = append(placed, unplaced[i]) |
| 125 | if i > 0 { |
| 126 | if i < len(unplaced)-1 { |
| 127 | copy(unplaced[i:], unplaced[i+1:]) |
| 128 | } |
| 129 | unplaced = unplaced[:len(unplaced)-1] |
| 130 | } else { |
| 131 | unplaced = unplaced[1:] |
| 132 | } |
| 133 | } |
| 134 | |
| 135 | // cp copies data from [srcOffset:srcOffset+count] to [dstOffset:dstOffset+count]. |
| 136 | cp := func(dstOffset, srcOffset, count int) { |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 137 | copy(dstData[dstOffset:dstOffset+count], srcData[srcOffset:srcOffset+count]) |
| 138 | copy(dstIDs[dstOffset:dstOffset+count], srcIDs[srcOffset:srcOffset+count]) |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 139 | } |
| 140 | |
| 141 | // number of items that have been placed. |
| 142 | newSize := 0 |
| 143 | |
| 144 | // While there's sequences to place... |
| 145 | for len(unplaced) > 0 { |
| 146 | // Place the next largest, unplaced sequence at the end of the new list |
| 147 | cp(newSize, *unplaced[0].offset, unplaced[0].count) |
| 148 | *unplaced[0].offset = newSize |
| 149 | newSize += unplaced[0].count |
| 150 | remove(0) |
| 151 | |
| 152 | for { |
| 153 | // Look for the sequence with the longest match against the |
| 154 | // currently placed data. Any mismatches with currently placed data |
| 155 | // will nullify the match. The head or tail of this sequence may |
| 156 | // extend the currently placed data. |
| 157 | best := match{} |
| 158 | |
| 159 | // For each unplaced sequence... |
| 160 | for i := 0; i < len(unplaced); i++ { |
| 161 | seq := unplaced[i] |
| 162 | |
| 163 | if best.len >= seq.count { |
| 164 | // The best match is already at least as long as this |
| 165 | // sequence and sequences are sorted by size, so best cannot |
| 166 | // be beaten. Stop searching. |
| 167 | break |
| 168 | } |
| 169 | |
| 170 | // Perform a full sweep from left to right, scoring the match... |
| 171 | for shift := -seq.count + 1; shift < newSize; shift++ { |
| 172 | dstS := max(shift, 0) |
| 173 | dstE := min(shift+seq.count, newSize) |
| 174 | count := dstE - dstS |
| 175 | srcS := *seq.offset - min(shift, 0) |
| 176 | srcE := srcS + count |
| 177 | |
| 178 | if best.len < count { |
| 179 | if equal(srcIDs[srcS:srcE], dstIDs[dstS:dstE]) { |
| 180 | best = match{shift, seq, count, i} |
| 181 | } |
| 182 | } |
| 183 | } |
| 184 | } |
| 185 | |
| 186 | if best.src.offset == nil { |
| 187 | // Nothing matched. Not even one element. |
| 188 | // Resort to placing the next largest sequence at the end. |
| 189 | break |
| 190 | } |
| 191 | |
| 192 | if best.dst < 0 { |
| 193 | // Best match wants to place the sequence to the left of the |
| 194 | // current output. We have to shuffle everything... |
| 195 | n := -best.dst |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 196 | copy(dstData[n:n+newSize], dstData) |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 197 | copy(dstIDs[n:n+newSize], dstIDs) |
| 198 | newSize += n |
| 199 | best.dst = 0 |
| 200 | for _, p := range placed { |
| 201 | *p.offset += n |
| 202 | } |
| 203 | } |
| 204 | |
| 205 | // Place the best matching sequence. |
| 206 | cp(best.dst, *best.src.offset, best.src.count) |
| 207 | newSize = max(newSize, best.dst+best.src.count) |
| 208 | *best.src.offset = best.dst |
| 209 | remove(best.idx) |
| 210 | } |
| 211 | } |
| 212 | |
| 213 | // Shrink the output buffer to the new size. |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 214 | dstData = dstData[:newSize] |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 215 | |
| 216 | // All done. |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 217 | return dstData |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 218 | } |
| 219 | |
| 220 | // Generate a set of identifiers for all the unique items in storage |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 221 | func (t lut[T]) itemIDs() []int32 { |
| 222 | storageSize := len(t.storage) |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 223 | keys := make([]int32, storageSize) |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 224 | dataToKey := map[T]int32{} |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 225 | for i := 0; i < storageSize; i++ { |
Ben Clayton | eb2f95e | 2023-08-09 17:29:31 +0000 | [diff] [blame] | 226 | data := t.storage[i] |
Ben Clayton | e8177da | 2021-06-01 15:41:11 +0000 | [diff] [blame] | 227 | key, found := dataToKey[data] |
| 228 | if !found { |
| 229 | key = int32(len(dataToKey)) |
| 230 | dataToKey[data] = key |
| 231 | } |
| 232 | keys[i] = key |
| 233 | } |
| 234 | return keys |
| 235 | } |
| 236 | |
| 237 | func max(a, b int) int { |
| 238 | if a < b { |
| 239 | return b |
| 240 | } |
| 241 | return a |
| 242 | } |
| 243 | |
| 244 | func min(a, b int) int { |
| 245 | if a > b { |
| 246 | return b |
| 247 | } |
| 248 | return a |
| 249 | } |
| 250 | |
| 251 | func equal(a, b []int32) bool { |
| 252 | for i, v := range a { |
| 253 | if b[i] != v { |
| 254 | return false |
| 255 | } |
| 256 | } |
| 257 | return true |
| 258 | } |