|  | // Copyright 2021 The Tint Authors. | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     http://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | // Package lut provides a look up table, which compresses indexed data | 
|  | package lut | 
|  |  | 
|  | import ( | 
|  | "sort" | 
|  |  | 
|  | "dawn.googlesource.com/dawn/tools/src/list" | 
|  | ) | 
|  |  | 
|  | // LUT is a look up table. | 
|  | // The table holds a number of items that are stored in a linear list. | 
|  | type LUT interface { | 
|  | // Add adds a sequence of items to the table. | 
|  | // items can be a single element, a slice of element, or a List of element. | 
|  | // Returns a pointer to the offset of the first item in the table's list. | 
|  | // The sequence of items stored at [offset, offset+N), where N is the | 
|  | // number of items added will remain equal, even after calling Compact(). | 
|  | Add(items interface{}) *int | 
|  | // Compact reorders the table items so that the table storage is compacted | 
|  | // by shuffling data around and de-duplicating sequences of common data. | 
|  | // Each originally added sequence is preserved in the resulting table, with | 
|  | // the same contiguous ordering, but with a potentially different offset. | 
|  | // Heuristics are used to shorten the table length, by exploiting common | 
|  | // subsequences, and removing duplicate sequences. | 
|  | // Note that shortest common superstring is NP-hard, so heuristics are used. | 
|  | // Compact updates pointers returned by Add(). | 
|  | Compact() | 
|  | } | 
|  |  | 
|  | // New returns a new look up table | 
|  | func New(storage list.List) LUT { | 
|  | return &lut{storage: storage} | 
|  | } | 
|  |  | 
|  | // A sequence represents a span of entries in the table | 
|  | type sequence struct { | 
|  | offset *int // Pointer to the start index of the sequence | 
|  | count  int  // Length of the sequence | 
|  | } | 
|  |  | 
|  | // lut implements LUT | 
|  | type lut struct { | 
|  | storage   list.List  // The List that backs this LUT | 
|  | sequences []sequence // The entries in the LUT | 
|  | } | 
|  |  | 
|  | func (t *lut) Add(items interface{}) *int { | 
|  | offset := t.storage.Count() | 
|  | t.storage.Append(items) | 
|  | count := t.storage.Count() - offset | 
|  | offsetPtr := &offset | 
|  | t.sequences = append(t.sequences, sequence{offsetPtr, count}) | 
|  | return offsetPtr | 
|  | } | 
|  |  | 
|  | func (t lut) Compact() { | 
|  | // Generate int32 identifiers for each unique item in the table. | 
|  | // We use these to compare items instead of comparing the real data as this | 
|  | // function is comparison-heavy, and integer compares are cheap. | 
|  | srcIDs := t.itemIDs() | 
|  | dstIDs := make([]int32, len(srcIDs)) | 
|  |  | 
|  | // Make a copy the data held in the table, use the copy as the source, and | 
|  | // t.storage as the destination. | 
|  | srcData := list.Copy(t.storage) | 
|  | dstData := t.storage | 
|  |  | 
|  | // Sort all the sequences by length, with the largest first. | 
|  | // This helps 'seed' the compacted form with the largest items first. | 
|  | // This can improve the compaction as small sequences can pack into larger, | 
|  | // placed items. | 
|  | sort.Slice(t.sequences, func(i, j int) bool { | 
|  | return t.sequences[i].count > t.sequences[j].count | 
|  | }) | 
|  |  | 
|  | // unplaced is the list of sequences that have not yet been placed. | 
|  | // All sequences are initially unplaced. | 
|  | unplaced := make([]sequence, len(t.sequences)) | 
|  | copy(unplaced, t.sequences) | 
|  |  | 
|  | // placed is the list of sequences that have been placed. | 
|  | // Nothing is initially placed. | 
|  | placed := make([]sequence, 0, len(t.sequences)) | 
|  |  | 
|  | // remove removes the sequence in unplaced with the index i. | 
|  | remove := func(i int) { | 
|  | placed = append(placed, unplaced[i]) | 
|  | if i > 0 { | 
|  | if i < len(unplaced)-1 { | 
|  | copy(unplaced[i:], unplaced[i+1:]) | 
|  | } | 
|  | unplaced = unplaced[:len(unplaced)-1] | 
|  | } else { | 
|  | unplaced = unplaced[1:] | 
|  | } | 
|  | } | 
|  |  | 
|  | // cp copies data from [srcOffset:srcOffset+count] to [dstOffset:dstOffset+count]. | 
|  | cp := func(dstOffset, srcOffset, count int) { | 
|  | dstData.CopyFrom(srcData, dstOffset, srcOffset, count) | 
|  | copy( | 
|  | dstIDs[dstOffset:dstOffset+count], | 
|  | srcIDs[srcOffset:srcOffset+count], | 
|  | ) | 
|  | } | 
|  |  | 
|  | // match describes a sequence that can be placed. | 
|  | type match struct { | 
|  | dst int      // destination offset | 
|  | src sequence // source sequence | 
|  | len int      // number of items that matched | 
|  | idx int      // sequence index | 
|  | } | 
|  |  | 
|  | // number of items that have been placed. | 
|  | newSize := 0 | 
|  |  | 
|  | // While there's sequences to place... | 
|  | for len(unplaced) > 0 { | 
|  | // Place the next largest, unplaced sequence at the end of the new list | 
|  | cp(newSize, *unplaced[0].offset, unplaced[0].count) | 
|  | *unplaced[0].offset = newSize | 
|  | newSize += unplaced[0].count | 
|  | remove(0) | 
|  |  | 
|  | for { | 
|  | // Look for the sequence with the longest match against the | 
|  | // currently placed data. Any mismatches with currently placed data | 
|  | // will nullify the match. The head or tail of this sequence may | 
|  | // extend the currently placed data. | 
|  | best := match{} | 
|  |  | 
|  | // For each unplaced sequence... | 
|  | for i := 0; i < len(unplaced); i++ { | 
|  | seq := unplaced[i] | 
|  |  | 
|  | if best.len >= seq.count { | 
|  | // The best match is already at least as long as this | 
|  | // sequence and sequences are sorted by size, so best cannot | 
|  | // be beaten. Stop searching. | 
|  | break | 
|  | } | 
|  |  | 
|  | // Perform a full sweep from left to right, scoring the match... | 
|  | for shift := -seq.count + 1; shift < newSize; shift++ { | 
|  | dstS := max(shift, 0) | 
|  | dstE := min(shift+seq.count, newSize) | 
|  | count := dstE - dstS | 
|  | srcS := *seq.offset - min(shift, 0) | 
|  | srcE := srcS + count | 
|  |  | 
|  | if best.len < count { | 
|  | if equal(srcIDs[srcS:srcE], dstIDs[dstS:dstE]) { | 
|  | best = match{shift, seq, count, i} | 
|  | } | 
|  | } | 
|  | } | 
|  | } | 
|  |  | 
|  | if best.src.offset == nil { | 
|  | // Nothing matched. Not even one element. | 
|  | // Resort to placing the next largest sequence at the end. | 
|  | break | 
|  | } | 
|  |  | 
|  | if best.dst < 0 { | 
|  | // Best match wants to place the sequence to the left of the | 
|  | // current output. We have to shuffle everything... | 
|  | n := -best.dst | 
|  | dstData.Copy(n, 0, newSize) | 
|  | copy(dstIDs[n:n+newSize], dstIDs) | 
|  | newSize += n | 
|  | best.dst = 0 | 
|  | for _, p := range placed { | 
|  | *p.offset += n | 
|  | } | 
|  | } | 
|  |  | 
|  | // Place the best matching sequence. | 
|  | cp(best.dst, *best.src.offset, best.src.count) | 
|  | newSize = max(newSize, best.dst+best.src.count) | 
|  | *best.src.offset = best.dst | 
|  | remove(best.idx) | 
|  | } | 
|  | } | 
|  |  | 
|  | // Shrink the output buffer to the new size. | 
|  | dstData.Resize(newSize) | 
|  |  | 
|  | // All done. | 
|  | } | 
|  |  | 
|  | // Generate a set of identifiers for all the unique items in storage | 
|  | func (t lut) itemIDs() []int32 { | 
|  | storageSize := t.storage.Count() | 
|  | keys := make([]int32, storageSize) | 
|  | dataToKey := map[interface{}]int32{} | 
|  | for i := 0; i < storageSize; i++ { | 
|  | data := t.storage.Get(i) | 
|  | key, found := dataToKey[data] | 
|  | if !found { | 
|  | key = int32(len(dataToKey)) | 
|  | dataToKey[data] = key | 
|  | } | 
|  | keys[i] = key | 
|  | } | 
|  | return keys | 
|  | } | 
|  |  | 
|  | func max(a, b int) int { | 
|  | if a < b { | 
|  | return b | 
|  | } | 
|  | return a | 
|  | } | 
|  |  | 
|  | func min(a, b int) int { | 
|  | if a > b { | 
|  | return b | 
|  | } | 
|  | return a | 
|  | } | 
|  |  | 
|  | func equal(a, b []int32) bool { | 
|  | for i, v := range a { | 
|  | if b[i] != v { | 
|  | return false | 
|  | } | 
|  | } | 
|  | return true | 
|  | } |