| // Copyright 2021 The Tint Authors. |
| // |
| // Licensed under the Apache License, Version 2.0 (the "License"); |
| // you may not use this file except in compliance with the License. |
| // You may obtain a copy of the License at |
| // |
| // http://www.apache.org/licenses/LICENSE-2.0 |
| // |
| // Unless required by applicable law or agreed to in writing, software |
| // distributed under the License is distributed on an "AS IS" BASIS, |
| // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| // See the License for the specific language governing permissions and |
| // limitations under the License. |
| |
| #include "src/tint/fuzzers/tint_regex_fuzzer/wgsl_mutator.h" |
| |
| #include <cassert> |
| #include <cstring> |
| #include <map> |
| #include <regex> |
| #include <string> |
| #include <utility> |
| #include <vector> |
| |
| #include "src/tint/fuzzers/random_generator.h" |
| |
| namespace tint::fuzzers::regex_fuzzer { |
| |
| WgslMutator::WgslMutator(RandomGenerator& generator) : generator_(generator) {} |
| |
| std::vector<size_t> WgslMutator::FindDelimiterIndices(const std::string& delimiter, |
| const std::string& wgsl_code) { |
| std::vector<size_t> result; |
| for (size_t pos = wgsl_code.find(delimiter, 0); pos != std::string::npos; |
| pos = wgsl_code.find(delimiter, pos + 1)) { |
| result.push_back(pos); |
| } |
| |
| return result; |
| } |
| |
| std::vector<std::pair<size_t, size_t>> WgslMutator::GetIdentifiers(const std::string& wgsl_code) { |
| std::vector<std::pair<size_t, size_t>> result; |
| |
| // This regular expression works by looking for a character that |
| // is not part of an identifier followed by a WGSL identifier, followed |
| // by a character which cannot be part of a WGSL identifer. The regex |
| // for the WGSL identifier is obtained from: |
| // https://www.w3.org/TR/WGSL/#identifiers. |
| std::regex wgsl_identifier_regex("[^a-zA-Z]([a-zA-Z][0-9a-zA-Z_]*)[^0-9a-zA-Z_]"); |
| |
| std::smatch match; |
| |
| std::string::const_iterator search_start(wgsl_code.cbegin()); |
| std::string prefix; |
| |
| while (regex_search(search_start, wgsl_code.cend(), match, wgsl_identifier_regex) == true) { |
| prefix += match.prefix(); |
| result.push_back(std::make_pair(prefix.size() + 1, match.str(1).size())); |
| prefix += match.str(0); |
| search_start = match.suffix().first; |
| } |
| return result; |
| } |
| |
| std::vector<std::pair<size_t, size_t>> WgslMutator::GetIntLiterals(const std::string& s) { |
| std::vector<std::pair<size_t, size_t>> result; |
| |
| // Looks for integer literals in decimal or hexadecimal form. |
| // Regex obtained here: https://www.w3.org/TR/WGSL/#literals |
| std::regex int_literal_regex("-?0x[0-9a-fA-F]+ | 0 | -?[1-9][0-9]*"); |
| std::regex uint_literal_regex("0x[0-9a-fA-F]+u | 0u | [1-9][0-9]*u"); |
| std::smatch match; |
| |
| std::string::const_iterator search_start(s.cbegin()); |
| std::string prefix = ""; |
| |
| while (regex_search(search_start, s.cend(), match, int_literal_regex) || |
| regex_search(search_start, s.cend(), match, uint_literal_regex)) { |
| prefix += match.prefix(); |
| result.push_back(std::make_pair(prefix.size() + 1, match.str(0).size() - 1)); |
| prefix += match.str(0); |
| search_start = match.suffix().first; |
| } |
| return result; |
| } |
| |
| size_t WgslMutator::FindClosingBrace(size_t opening_bracket_pos, const std::string& wgsl_code) { |
| size_t open_bracket_count = 1; |
| size_t pos = opening_bracket_pos + 1; |
| while (open_bracket_count >= 1 && pos < wgsl_code.size()) { |
| if (wgsl_code[pos] == '{') { |
| ++open_bracket_count; |
| } else if (wgsl_code[pos] == '}') { |
| --open_bracket_count; |
| } |
| ++pos; |
| } |
| return (pos == wgsl_code.size() && open_bracket_count >= 1) ? 0 : pos - 1; |
| } |
| |
| std::vector<std::pair<size_t, bool>> WgslMutator::GetFunctionBodyPositions( |
| const std::string& wgsl_code) { |
| // Finds all the functions with a non-void return value. |
| std::regex function_regex("fn[^a-zA-Z_0-9][^\\{]*\\{"); |
| std::vector<std::pair<size_t, bool>> result; |
| |
| auto functions_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), function_regex); |
| auto functions_end = std::sregex_iterator(); |
| |
| for (std::sregex_iterator i = functions_begin; i != functions_end; ++i) { |
| bool returns_value = i->str().find("->") != std::string::npos; |
| result.push_back( |
| {static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1), returns_value}); |
| } |
| return result; |
| } |
| |
| std::vector<size_t> WgslMutator::GetLoopBodyPositions(const std::string& wgsl_code) { |
| // Finds all loops. |
| std::regex loop_regex("[^a-zA-Z_0-9](for|while|loop)[^\\{]*\\{"); |
| std::vector<size_t> result; |
| |
| auto loops_begin = std::sregex_iterator(wgsl_code.begin(), wgsl_code.end(), loop_regex); |
| auto loops_end = std::sregex_iterator(); |
| |
| for (std::sregex_iterator i = loops_begin; i != loops_end; ++i) { |
| result.push_back(static_cast<size_t>(i->suffix().first - wgsl_code.cbegin() - 1)); |
| } |
| return result; |
| } |
| |
| bool WgslMutator::InsertReturnStatement(std::string& wgsl_code) { |
| std::vector<std::pair<size_t, bool>> function_body_positions = |
| GetFunctionBodyPositions(wgsl_code); |
| |
| // No function was found in wgsl_code. |
| if (function_body_positions.empty()) { |
| return false; |
| } |
| |
| // Pick a random function |
| auto function = generator_.GetRandomElement(function_body_positions); |
| |
| // Find the corresponding closing bracket for the function, and find a semi-colon within the |
| // function body. |
| size_t left_bracket_pos = function.first; |
| |
| size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); |
| |
| if (right_bracket_pos == 0) { |
| return false; |
| } |
| |
| std::vector<size_t> semicolon_positions; |
| for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos; |
| pos = wgsl_code.find(";", pos + 1)) { |
| semicolon_positions.push_back(pos); |
| } |
| |
| if (semicolon_positions.empty()) { |
| return false; |
| } |
| |
| std::string return_statement = "return"; |
| if (function.second) { |
| // The function returns a value. Get all identifiers and integer literals to use as |
| // potential return values. |
| std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code); |
| auto return_values = identifiers; |
| std::vector<std::pair<size_t, size_t>> int_literals = GetIntLiterals(wgsl_code); |
| return_values.insert(return_values.end(), int_literals.begin(), int_literals.end()); |
| std::pair<size_t, size_t> return_value = generator_.GetRandomElement(return_values); |
| return_statement += " " + wgsl_code.substr(return_value.first, return_value.second); |
| } |
| return_statement += ";"; |
| |
| // Insert the return statement immediately after the semicolon. |
| wgsl_code.insert(generator_.GetRandomElement(semicolon_positions) + 1, return_statement); |
| return true; |
| } |
| |
| bool WgslMutator::InsertBreakOrContinue(std::string& wgsl_code) { |
| std::vector<size_t> loop_body_positions = GetLoopBodyPositions(wgsl_code); |
| |
| // No loop was found in wgsl_code. |
| if (loop_body_positions.empty()) { |
| return false; |
| } |
| |
| // Pick a random loop's opening bracket, find the corresponding closing |
| // bracket, and find a semi-colon within the loop body. |
| size_t left_bracket_pos = generator_.GetRandomElement(loop_body_positions); |
| |
| size_t right_bracket_pos = FindClosingBrace(left_bracket_pos, wgsl_code); |
| |
| if (right_bracket_pos == 0) { |
| return false; |
| } |
| |
| std::vector<size_t> semicolon_positions; |
| for (size_t pos = wgsl_code.find(";", left_bracket_pos + 1); pos < right_bracket_pos; |
| pos = wgsl_code.find(";", pos + 1)) { |
| semicolon_positions.push_back(pos); |
| } |
| |
| if (semicolon_positions.empty()) { |
| return false; |
| } |
| |
| size_t semicolon_position = generator_.GetRandomElement(semicolon_positions); |
| |
| // Insert a break or continue immediately after the semicolon. |
| wgsl_code.insert(semicolon_position + 1, generator_.GetBool() ? "break;" : "continue;"); |
| return true; |
| } |
| |
| void WgslMutator::SwapIntervals(size_t idx1, |
| size_t reg1_len, |
| size_t idx2, |
| size_t reg2_len, |
| std::string& wgsl_code) { |
| std::string region_1 = wgsl_code.substr(idx1 + 1, reg1_len - 1); |
| |
| std::string region_2 = wgsl_code.substr(idx2 + 1, reg2_len - 1); |
| |
| // The second transformation is done first as it doesn't affect idx2. |
| wgsl_code.replace(idx2 + 1, region_2.size(), region_1); |
| |
| wgsl_code.replace(idx1 + 1, region_1.size(), region_2); |
| } |
| |
| void WgslMutator::DeleteInterval(size_t idx1, size_t reg_len, std::string& wgsl_code) { |
| wgsl_code.erase(idx1 + 1, reg_len - 1); |
| } |
| |
| void WgslMutator::DuplicateInterval(size_t idx1, |
| size_t reg1_len, |
| size_t idx2, |
| std::string& wgsl_code) { |
| std::string region = wgsl_code.substr(idx1 + 1, reg1_len - 1); |
| wgsl_code.insert(idx2 + 1, region); |
| } |
| |
| void WgslMutator::ReplaceRegion(size_t idx1, |
| size_t id1_len, |
| size_t idx2, |
| size_t id2_len, |
| std::string& wgsl_code) { |
| std::string region_1 = wgsl_code.substr(idx1, id1_len); |
| std::string region_2 = wgsl_code.substr(idx2, id2_len); |
| wgsl_code.replace(idx2, region_2.size(), region_1); |
| } |
| |
| void WgslMutator::ReplaceInterval(size_t start_index, |
| size_t length, |
| std::string replacement_text, |
| std::string& wgsl_code) { |
| std::string region_1 = wgsl_code.substr(start_index, length); |
| wgsl_code.replace(start_index, length, replacement_text); |
| } |
| |
| bool WgslMutator::SwapRandomIntervals(const std::string& delimiter, std::string& wgsl_code) { |
| std::vector<size_t> delimiter_positions = FindDelimiterIndices(delimiter, wgsl_code); |
| |
| // Need to have at least 3 indices. |
| if (delimiter_positions.size() < 3) { |
| return false; |
| } |
| |
| // Choose indices: |
| // interval_1_start < interval_1_end <= interval_2_start < interval_2_end |
| uint32_t interval_1_start = |
| generator_.GetUInt32(static_cast<uint32_t>(delimiter_positions.size()) - 2u); |
| uint32_t interval_1_end = generator_.GetUInt32( |
| interval_1_start + 1u, static_cast<uint32_t>(delimiter_positions.size()) - 1u); |
| uint32_t interval_2_start = generator_.GetUInt32( |
| interval_1_end, static_cast<uint32_t>(delimiter_positions.size()) - 1u); |
| uint32_t interval_2_end = generator_.GetUInt32( |
| interval_2_start + 1u, static_cast<uint32_t>(delimiter_positions.size())); |
| |
| SwapIntervals(delimiter_positions[interval_1_start], |
| delimiter_positions[interval_1_end] - delimiter_positions[interval_1_start], |
| delimiter_positions[interval_2_start], |
| delimiter_positions[interval_2_end] - delimiter_positions[interval_2_start], |
| wgsl_code); |
| |
| return true; |
| } |
| |
| bool WgslMutator::DeleteRandomInterval(const std::string& delimiter, std::string& wgsl_code) { |
| std::vector<size_t> delimiter_positions = FindDelimiterIndices(delimiter, wgsl_code); |
| |
| // Need to have at least 2 indices. |
| if (delimiter_positions.size() < 2) { |
| return false; |
| } |
| |
| uint32_t interval_start = |
| generator_.GetUInt32(static_cast<uint32_t>(delimiter_positions.size()) - 1u); |
| uint32_t interval_end = generator_.GetUInt32(interval_start + 1u, |
| static_cast<uint32_t>(delimiter_positions.size())); |
| |
| DeleteInterval(delimiter_positions[interval_start], |
| delimiter_positions[interval_end] - delimiter_positions[interval_start], |
| wgsl_code); |
| |
| return true; |
| } |
| |
| bool WgslMutator::DuplicateRandomInterval(const std::string& delimiter, std::string& wgsl_code) { |
| std::vector<size_t> delimiter_positions = FindDelimiterIndices(delimiter, wgsl_code); |
| |
| // Need to have at least 2 indices |
| if (delimiter_positions.size() < 2) { |
| return false; |
| } |
| |
| uint32_t interval_start = |
| generator_.GetUInt32(static_cast<uint32_t>(delimiter_positions.size()) - 1u); |
| uint32_t interval_end = generator_.GetUInt32(interval_start + 1u, |
| static_cast<uint32_t>(delimiter_positions.size())); |
| uint32_t duplication_point = |
| generator_.GetUInt32(static_cast<uint32_t>(delimiter_positions.size())); |
| |
| DuplicateInterval(delimiter_positions[interval_start], |
| delimiter_positions[interval_end] - delimiter_positions[interval_start], |
| delimiter_positions[duplication_point], wgsl_code); |
| |
| return true; |
| } |
| |
| bool WgslMutator::ReplaceRandomIdentifier(std::string& wgsl_code) { |
| std::vector<std::pair<size_t, size_t>> identifiers = GetIdentifiers(wgsl_code); |
| |
| // Need at least 2 identifiers |
| if (identifiers.size() < 2) { |
| return false; |
| } |
| |
| uint32_t id1_index = generator_.GetUInt32(static_cast<uint32_t>(identifiers.size())); |
| uint32_t id2_index = generator_.GetUInt32(static_cast<uint32_t>(identifiers.size())); |
| |
| // The two identifiers must be different |
| while (id1_index == id2_index) { |
| id2_index = generator_.GetUInt32(static_cast<uint32_t>(identifiers.size())); |
| } |
| |
| ReplaceRegion(identifiers[id1_index].first, identifiers[id1_index].second, |
| identifiers[id2_index].first, identifiers[id2_index].second, wgsl_code); |
| |
| return true; |
| } |
| |
| bool WgslMutator::ReplaceRandomIntLiteral(std::string& wgsl_code) { |
| std::vector<std::pair<size_t, size_t>> literals = GetIntLiterals(wgsl_code); |
| |
| // Need at least one integer literal |
| if (literals.size() < 1) { |
| return false; |
| } |
| |
| uint32_t literal_index = generator_.GetUInt32(static_cast<uint32_t>(literals.size())); |
| |
| // INT_MAX = 2147483647, INT_MIN = -2147483648 |
| std::vector<std::string> boundary_values = {"2147483647", "-2147483648", "1", |
| "-1", "0", "4294967295"}; |
| |
| uint32_t boundary_index = generator_.GetUInt32(static_cast<uint32_t>(boundary_values.size())); |
| |
| ReplaceInterval(literals[literal_index].first, literals[literal_index].second, |
| boundary_values[boundary_index], wgsl_code); |
| |
| return true; |
| } |
| |
| std::string WgslMutator::ChooseRandomReplacementForOperator(const std::string& existing_operator) { |
| // Operators are partitioned into three classes: assignment, expression and increment. The regex |
| // mutator will swap operators in the same class. The hypothesis is that this should exercise a |
| // number of type-safe swaps (e.g. changing += to *=), as well as some badly-typed yet |
| // interesting swaps (e.g. changing + to ^ when the operators are matrices), while avoiding |
| // making totally nonsensical replacements (such as changing ++ too /). |
| std::vector<std::string> assignment_operators{ |
| "=", "+=", "-=", "*=", "/=", "%=", "&=", "|=", "^=", "<<=", ">>="}; |
| std::vector<std::string> expression_operators{"+", "-", "*", "/", "%", "&&", "||", |
| "&", "|", "^", "<<", ">>", "<", ">", |
| "<=", ">=", "!", "!=", "~"}; |
| std::vector<std::string> increment_operators{"++", "--"}; |
| for (auto operators : {assignment_operators, expression_operators, increment_operators}) { |
| auto it = std::find(operators.begin(), operators.end(), existing_operator); |
| if (it != operators.end()) { |
| // The operator falls into this category, so select another operator from the category. |
| operators.erase(it); |
| return generator_.GetRandomElement(operators); |
| } |
| } |
| assert(false && "Unknown operator"); |
| return ""; |
| } |
| |
| bool WgslMutator::ReplaceRandomOperator(std::string& wgsl_code) { |
| // Choose an index into the code at random. |
| const uint32_t start_index = generator_.GetUInt32(static_cast<uint32_t>(wgsl_code.size())); |
| // Find the first operator occurrence from the chosen point, wrapping back to the start of the |
| // file if needed. |
| auto maybe_operator_occurrence = FindOperatorOccurrence(wgsl_code, start_index); |
| if (!maybe_operator_occurrence.has_value()) { |
| // It is unlikely that there will be *no* operators in the file, but if this is the case |
| // then this mutation cannot be applied. |
| return false; |
| } |
| std::string existing_operator = |
| wgsl_code.substr(maybe_operator_occurrence->first, maybe_operator_occurrence->second); |
| // Replace the identified operator with a randomly-chosen alternative. |
| wgsl_code.replace(maybe_operator_occurrence->first, maybe_operator_occurrence->second, |
| ChooseRandomReplacementForOperator(existing_operator)); |
| return true; |
| } |
| |
| std::optional<std::pair<uint32_t, uint32_t>> WgslMutator::FindOperatorOccurrence( |
| const std::string& wgsl_code, |
| uint32_t start_index) { |
| // Loops through the characters of the code in a wrap-around fashion, looking for the first |
| // encountered token that is a WGSL operator. |
| |
| for (size_t i = 0; i < wgsl_code.size(); i++) { |
| uint32_t current_index = static_cast<uint32_t>((start_index + i) % wgsl_code.size()); |
| |
| // To cater for multi-character operator tokens, get the three consecutive characters from |
| // the code string starting at the current index. Use null characters to account for the |
| // case where search has reached the end of the code string. |
| char first_character = wgsl_code[current_index]; |
| char second_character = |
| current_index == wgsl_code.size() - 1 ? '\0' : wgsl_code[current_index + 1]; |
| char third_character = |
| current_index >= wgsl_code.size() - 2 ? '\0' : wgsl_code[current_index + 2]; |
| |
| // This uses the extracted characters to match for the various WGSL operators. |
| switch (first_character) { |
| case '!': |
| case '^': |
| switch (second_character) { |
| case '=': |
| return {{current_index, 2}}; |
| default: |
| return {{current_index, 1}}; |
| } |
| case '|': |
| case '&': |
| case '+': |
| case '-': |
| if (second_character == first_character || second_character == '=') { |
| return {{current_index, 2}}; |
| } |
| return {{current_index, 1}}; |
| case '*': |
| case '/': |
| case '%': |
| switch (second_character) { |
| case '=': |
| return {{current_index, 2}}; |
| default: |
| return {{current_index, 1}}; |
| } |
| case '=': |
| if (second_character == '=') { |
| return {{current_index, 2}}; |
| } |
| return {{current_index, 1}}; |
| case '<': |
| case '>': |
| if (second_character == '=') { |
| return {{current_index, 2}}; |
| } |
| if (second_character == first_character) { |
| if (third_character == '=') { |
| return {{current_index, 3}}; |
| } |
| return {{current_index, 2}}; |
| } |
| return {{current_index, 1}}; |
| case '~': |
| return {{current_index, 1}}; |
| default: |
| break; |
| } |
| } |
| // No operator was found, so empty is returned. |
| return {}; |
| } |
| |
| } // namespace tint::fuzzers::regex_fuzzer |