diff --git a/Source/WebCore/contentextensions/Term.h b/Source/WebCore/contentextensions/Term.h index 0aec5842c45b..b169f6d3d73c 100644 --- a/Source/WebCore/contentextensions/Term.h +++ b/Source/WebCore/contentextensions/Term.h @@ -78,6 +78,7 @@ class Term { // Group terms only. void extendGroupSubpattern(const Term&); + void startNewAlternative(); void quantify(const AtomQuantifier&); @@ -169,7 +170,10 @@ class Term { friend void add(Hasher&, const Term::CharacterSet&); struct Group { - Vector terms; + Vector> alternatives; + + Vector& terms() { return alternatives.last(); } + const Vector& terms() const { return alternatives.first(); } friend bool operator==(const Group&, const Group&) = default; }; @@ -197,7 +201,7 @@ inline void add(Hasher& hasher, const Term::CharacterSet& characterSet) inline void add(Hasher& hasher, const Term::Group& group) { - add(hasher, group.terms); + add(hasher, group.alternatives); } inline void add(Hasher& hasher, const Term& term) @@ -253,8 +257,12 @@ inline String Term::toString() const case TermType::Group: { StringBuilder builder; builder.append('('); - for (const Term& term : m_atomData.group.terms) - builder.append(term.toString()); + for (unsigned a = 0; a < m_atomData.group.alternatives.size(); ++a) { + if (a) + builder.append('|'); + for (const Term& term : m_atomData.group.alternatives[a]) + builder.append(term.toString()); + } builder.append(')'); builder.append(quantifierToString(m_quantifier)); return builder.toString(); @@ -294,6 +302,7 @@ inline Term::Term(GroupTermTag) : m_termType(TermType::Group) { new (NotNull, &m_atomData.group) Group(); + m_atomData.group.alternatives.append(Vector()); } inline Term::Term(EndOfLineAssertionTermTag) @@ -371,7 +380,15 @@ inline void Term::extendGroupSubpattern(const Term& term) ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group); if (m_termType != TermType::Group) return; - m_atomData.group.terms.append(term); + m_atomData.group.alternatives.last().append(term); +} + +inline void Term::startNewAlternative() +{ + ASSERT_WITH_SECURITY_IMPLICATION(m_termType == TermType::Group); + if (m_termType != TermType::Group) + return; + m_atomData.group.alternatives.append(Vector()); } inline void Term::quantify(const AtomQuantifier& quantifier) @@ -443,9 +460,11 @@ inline bool Term::matchesAtLeastOneCharacter() const return false; if (m_termType == TermType::Group) { - for (const Term& term : m_atomData.group.terms) { - if (term.matchesAtLeastOneCharacter()) - return true; + for (const auto& alternative : m_atomData.group.alternatives) { + for (const Term& term : alternative) { + if (term.matchesAtLeastOneCharacter()) + return true; + } } return false; } @@ -465,25 +484,22 @@ inline bool Term::isKnownToMatchAnyString() const return isUniversalTransition() && m_quantifier == AtomQuantifier::ZeroOrMore; break; case TermType::Group: { - // There are infinitely many ways to match anything with groups, we just handle simple cases - if (m_atomData.group.terms.size() != 1) + if (m_atomData.group.alternatives.size() != 1) return false; - const Term& firstTermInGroup = m_atomData.group.terms.first(); - // -(.*) with any quantifier. + const auto& terms = m_atomData.group.alternatives.first(); + if (terms.size() != 1) + return false; + + const Term& firstTermInGroup = terms.first(); if (firstTermInGroup.isKnownToMatchAnyString()) return true; if (firstTermInGroup.isUniversalTransition()) { - // -(.)*, (.+)*, (.?)* etc. if (m_quantifier == AtomQuantifier::ZeroOrMore) return true; - - // -(.+)?. if (m_quantifier == AtomQuantifier::ZeroOrOne && firstTermInGroup.m_quantifier == AtomQuantifier::OneOrMore) return true; - - // -(.?)+. if (m_quantifier == AtomQuantifier::OneOrMore && firstTermInGroup.m_quantifier == AtomQuantifier::ZeroOrOne) return true; } @@ -506,7 +522,9 @@ inline bool Term::hasFixedLength() const case TermType::Group: { if (m_quantifier != AtomQuantifier::One) return false; - for (const Term& term : m_atomData.group.terms) { + if (m_atomData.group.alternatives.size() != 1) + return false; + for (const Term& term : m_atomData.group.alternatives.first()) { if (!term.hasFixedLength()) return false; } @@ -564,7 +582,7 @@ inline bool Term::isUniversalTransition() const return (m_atomData.characterSet.inverted() && !m_atomData.characterSet.bitCount()) || (!m_atomData.characterSet.inverted() && m_atomData.characterSet.bitCount() == 127 && !m_atomData.characterSet.get(0)); case TermType::Group: - return m_atomData.group.terms.size() == 1 && m_atomData.group.terms.first().isUniversalTransition(); + return m_atomData.group.alternatives.size() == 1 && m_atomData.group.alternatives.first().size() == 1 && m_atomData.group.alternatives.first().first().isUniversalTransition(); } return false; } @@ -614,25 +632,33 @@ inline void Term::generateSubgraphForAtom(NFA& nfa, ImmutableCharNFANodeBuilder& break; } case TermType::Group: { - if (m_atomData.group.terms.isEmpty()) { - // FIXME: any kind of empty term could be avoided in the parser. This case should turned into an assertion. - source.addEpsilonTransition(destination); - return; - } + auto generateSequence = [&](const Vector& terms, ImmutableCharNFANodeBuilder& seqSource, uint32_t seqDestination) { + if (terms.isEmpty()) { + seqSource.addEpsilonTransition(seqDestination); + return; + } + if (terms.size() == 1) { + terms.first().generateGraph(nfa, seqSource, seqDestination); + return; + } + ImmutableCharNFANodeBuilder lastTarget = terms.first().generateGraph(nfa, seqSource, ActionList()); + for (unsigned i = 1; i < terms.size() - 1; ++i) { + ImmutableCharNFANodeBuilder newNode = terms[i].generateGraph(nfa, lastTarget, ActionList()); + lastTarget = WTF::move(newNode); + } + terms.last().generateGraph(nfa, lastTarget, seqDestination); + }; - if (m_atomData.group.terms.size() == 1) { - m_atomData.group.terms.first().generateGraph(nfa, source, destination); - return; + if (m_atomData.group.alternatives.size() == 1) { + generateSequence(m_atomData.group.alternatives.first(), source, destination); + break; } - ImmutableCharNFANodeBuilder lastTarget = m_atomData.group.terms.first().generateGraph(nfa, source, ActionList()); - for (unsigned i = 1; i < m_atomData.group.terms.size() - 1; ++i) { - const Term& currentTerm = m_atomData.group.terms[i]; - ImmutableCharNFANodeBuilder newNode = currentTerm.generateGraph(nfa, lastTarget, ActionList()); - lastTarget = WTF::move(newNode); + for (const auto& alternative : m_atomData.group.alternatives) { + ImmutableCharNFANodeBuilder branchStart(nfa); + source.addEpsilonTransition(branchStart); + generateSequence(alternative, branchStart, destination); } - const Term& lastTerm = m_atomData.group.terms.last(); - lastTerm.generateGraph(nfa, lastTarget, destination); break; } } @@ -658,8 +684,10 @@ inline size_t Term::memoryUsed() const { size_t extraMemory = 0; if (m_termType == TermType::Group) { - for (const Term& term : m_atomData.group.terms) - extraMemory += term.memoryUsed(); + for (const auto& alternative : m_atomData.group.alternatives) { + for (const Term& term : alternative) + extraMemory += term.memoryUsed(); + } } return sizeof(Term) + extraMemory; } diff --git a/Source/WebCore/contentextensions/URLFilterParser.cpp b/Source/WebCore/contentextensions/URLFilterParser.cpp index 20d294849fb2..ffc065917b0e 100644 --- a/Source/WebCore/contentextensions/URLFilterParser.cpp +++ b/Source/WebCore/contentextensions/URLFilterParser.cpp @@ -51,7 +51,13 @@ class PatternParser { if (hasError()) return; - sinkFloatingTermIfNecessary(); + if (m_hasTopLevelDisjunction) { + sinkFloatingTermIfNecessary(); + m_floatingTerm = m_openGroups.takeLast(); + sinkFloatingTermIfNecessary(); + m_hasTopLevelDisjunction = false; + } else + sinkFloatingTermIfNecessary(); simplifySunkTerms(); @@ -101,10 +107,41 @@ class PatternParser { sinkFloatingTermIfNecessary(); ASSERT(!m_floatingTerm.isValid()); - if (builtInCharacterClassID == JSC::Yarr::BuiltInCharacterClassID::DotClassID && !inverted) + if (builtInCharacterClassID == JSC::Yarr::BuiltInCharacterClassID::DotClassID && !inverted) { m_floatingTerm = Term(Term::UniversalTransition); - else - fail(URLFilterParser::UnsupportedCharacterClass); + return; + } + + if (builtInCharacterClassID == JSC::Yarr::BuiltInCharacterClassID::DigitClassID) { + m_floatingTerm = Term(Term::CharacterSetTerm, inverted); + for (unsigned i = '0'; i <= '9'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + return; + } + + if (builtInCharacterClassID == JSC::Yarr::BuiltInCharacterClassID::WordClassID) { + m_floatingTerm = Term(Term::CharacterSetTerm, inverted); + for (unsigned i = '0'; i <= '9'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + for (unsigned i = 'a'; i <= 'z'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + for (unsigned i = 'A'; i <= 'Z'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + m_floatingTerm.addCharacter('_', true); + return; + } + + if (builtInCharacterClassID == JSC::Yarr::BuiltInCharacterClassID::SpaceClassID) { + m_floatingTerm = Term(Term::CharacterSetTerm, inverted); + m_floatingTerm.addCharacter(' ', true); + m_floatingTerm.addCharacter('\t', true); + m_floatingTerm.addCharacter('\n', true); + m_floatingTerm.addCharacter('\r', true); + m_floatingTerm.addCharacter('\f', true); + return; + } + + fail(URLFilterParser::UnsupportedCharacterClass); } void NODELETE quantifyAtom(unsigned minimum, unsigned maximum, bool) @@ -114,16 +151,46 @@ class PatternParser { ASSERT(m_floatingTerm.isValid()); - if (!minimum && maximum == 1) + if (!minimum && !maximum) { + // {0} means match zero times — discard the term. + m_floatingTerm = Term(); + } else if (!minimum && maximum == 1) m_floatingTerm.quantify(AtomQuantifier::ZeroOrOne); else if (!minimum && maximum == JSC::Yarr::quantifyInfinite) m_floatingTerm.quantify(AtomQuantifier::ZeroOrMore); else if (minimum == 1 && maximum == JSC::Yarr::quantifyInfinite) m_floatingTerm.quantify(AtomQuantifier::OneOrMore); + else if (minimum > 0 || maximum > 1) + expandQuantifier(minimum, maximum); else fail(URLFilterParser::InvalidQuantifier); } + void expandQuantifier(unsigned minimum, unsigned maximum) + { + Term baseTerm = m_floatingTerm; + m_floatingTerm = Term(); + + Term group(Term::GroupTerm); + + for (unsigned i = 0; i < minimum; ++i) + group.extendGroupSubpattern(baseTerm); + + if (maximum == JSC::Yarr::quantifyInfinite) { + Term oneOrMore(baseTerm); + oneOrMore.quantify(AtomQuantifier::OneOrMore); + group.extendGroupSubpattern(oneOrMore); + } else { + for (unsigned i = minimum; i < maximum; ++i) { + Term optional(baseTerm); + optional.quantify(AtomQuantifier::ZeroOrOne); + group.extendGroupSubpattern(optional); + } + } + + m_floatingTerm = group; + } + void NODELETE atomBackReference(unsigned) { fail(URLFilterParser::BackReference); @@ -163,9 +230,24 @@ class PatternParser { m_floatingTerm = Term(Term::EndOfLineAssertionTerm); } - void NODELETE assertionWordBoundary(bool) + void assertionWordBoundary(bool inverted) { - fail(URLFilterParser::WordBoundary); + if (hasError()) + return; + + sinkFloatingTermIfNecessary(); + ASSERT(!m_floatingTerm.isValid()); + + // Approximate \b by matching a non-word character. + // \B (inverted) matches a word character. + m_floatingTerm = Term(Term::CharacterSetTerm, !inverted); + for (unsigned i = '0'; i <= '9'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + for (unsigned i = 'a'; i <= 'z'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + for (unsigned i = 'A'; i <= 'Z'; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + m_floatingTerm.addCharacter('_', true); } void atomCharacterClassBegin(bool inverted = false) @@ -231,9 +313,45 @@ class PatternParser { // Nothing to do here. The character set atom may have a quantifier, we sink the atom lazily. } - void NODELETE atomCharacterClassBuiltIn(JSC::Yarr::BuiltInCharacterClassID, bool) + void atomCharacterClassBuiltIn(JSC::Yarr::BuiltInCharacterClassID builtInCharacterClassID, bool inverted) { - fail(URLFilterParser::AtomCharacter); + if (hasError()) + return; + + auto addChars = [&](std::initializer_list> ranges) { + if (!inverted) { + for (auto [lo, hi] : ranges) { + for (unsigned i = lo; i <= hi; ++i) + m_floatingTerm.addCharacter(static_cast(i), true); + } + } else { + // Build a set of characters in the class, then add everything NOT in it. + std::array inClass { }; + for (auto [lo, hi] : ranges) { + for (unsigned i = lo; i <= hi; ++i) + inClass[i] = true; + } + for (unsigned i = 1; i < 128; ++i) { + if (!inClass[i]) + m_floatingTerm.addCharacter(static_cast(i), true); + } + } + }; + + switch (builtInCharacterClassID) { + case JSC::Yarr::BuiltInCharacterClassID::DigitClassID: + addChars({ {'0', '9'} }); + break; + case JSC::Yarr::BuiltInCharacterClassID::WordClassID: + addChars({ {'0', '9'}, {'a', 'z'}, {'A', 'Z'}, {'_', '_'} }); + break; + case JSC::Yarr::BuiltInCharacterClassID::SpaceClassID: + addChars({ {' ', ' '}, {'\t', '\t'}, {'\n', '\n'}, {'\r', '\r'}, {'\f', '\f'} }); + break; + default: + fail(URLFilterParser::AtomCharacter); + break; + } } void atomParenthesesSubpatternBegin(bool = true, std::optional = std::nullopt) @@ -267,9 +385,25 @@ class PatternParser { m_floatingTerm = m_openGroups.takeLast(); } - void NODELETE disjunction(JSC::Yarr::CreateDisjunctionPurpose) + void disjunction(JSC::Yarr::CreateDisjunctionPurpose) { - fail(URLFilterParser::Disjunction); + if (hasError()) + return; + + if (m_openGroups.isEmpty()) { + sinkFloatingTermIfNecessary(); + + Term implicitGroup(Term::GroupTerm); + for (const auto& term : m_sunkTerms) + implicitGroup.extendGroupSubpattern(term); + m_sunkTerms.clear(); + + m_openGroups.append(WTF::move(implicitGroup)); + m_hasTopLevelDisjunction = true; + } else + sinkFloatingTermIfNecessary(); + + m_openGroups.last().startNewAlternative(); } NO_RETURN_DUE_TO_CRASH void resetForReparsing() @@ -305,7 +439,7 @@ class PatternParser { return; } - if (m_floatingTerm.isEndOfLineAssertion()) + if (m_floatingTerm.isEndOfLineAssertion() && m_openGroups.isEmpty()) m_hasProcessedEndOfLineAssertion = true; if (!m_openGroups.isEmpty()) { @@ -369,6 +503,7 @@ class PatternParser { Term m_floatingTerm; bool m_hasBeginningOfLineAssertion { false }; bool m_hasProcessedEndOfLineAssertion { false }; + bool m_hasTopLevelDisjunction { false }; URLFilterParser::ParseStatus m_parseStatus; }; diff --git a/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestRule.mm b/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestRule.mm index af514baca994..f644969c77c5 100644 --- a/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestRule.mm +++ b/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestRule.mm @@ -1010,6 +1010,7 @@ - (NSDictionary *)_chromeResourceTypeToWebKitResourceType @"image": @"image", @"main_frame": @"top-document", @"media": @"media", + @"object": @"other", @"other": @"other", @"ping": @"ping", @"script": @"script", diff --git a/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestTranslator.mm b/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestTranslator.mm index 2d5f09dd4e09..2282b86667ad 100644 --- a/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestTranslator.mm +++ b/Source/WebKit/UIProcess/Extensions/Cocoa/_WKWebExtensionDeclarativeNetRequestTranslator.mm @@ -50,6 +50,16 @@ @implementation _WKWebExtensionDeclarativeNetRequestTranslator NSString *errorString; _WKWebExtensionDeclarativeNetRequestRule *rule = [[_WKWebExtensionDeclarativeNetRequestRule alloc] initWithDictionary:ruleJSON rulesetID:rulesetID errorString:&errorString]; + if (!rule) { + if (errorString) { + totalErrorCount++; + + if (errorStrings.count < maximumNumberOfDeclarativeNetRequestErrorsToSurface) + [errorStrings addObject:errorString]; + } + continue; + } + if (!rulesetIDsToRuleIDs[rulesetID]) rulesetIDsToRuleIDs[rulesetID] = [NSMutableSet set]; @@ -61,20 +71,12 @@ @implementation _WKWebExtensionDeclarativeNetRequestTranslator } [rulesetIDsToRuleIDs[rulesetID] addObject:@(rule.ruleID)]; - - if (rule) - [allValidatedRules addObject:rule]; - else if (errorString) { - totalErrorCount++; - - if (errorStrings.count < maximumNumberOfDeclarativeNetRequestErrorsToSurface) - [errorStrings addObject:errorString]; - } + [allValidatedRules addObject:rule]; } } if (totalErrorCount > maximumNumberOfDeclarativeNetRequestErrorsToSurface) - [errorStrings addObject:@"Error limit hit. No longer omitting errors."]; + [errorStrings addObject:[NSString stringWithFormat:@"Too many errors (%lu total). Only the first %lu are reported.", (unsigned long)totalErrorCount, maximumNumberOfDeclarativeNetRequestErrorsToSurface]]; if (outErrorStrings) *outErrorStrings = [errorStrings copy]; diff --git a/Tools/TestWebKitAPI/Resources/cocoa/ghostery-dnr-rulesets.zip b/Tools/TestWebKitAPI/Resources/cocoa/ghostery-dnr-rulesets.zip new file mode 100644 index 000000000000..b435fddd6581 Binary files /dev/null and b/Tools/TestWebKitAPI/Resources/cocoa/ghostery-dnr-rulesets.zip differ diff --git a/Tools/TestWebKitAPI/Tests/WebKit/WKWebView/WKWebExtensionAPIDeclarativeNetRequest.mm b/Tools/TestWebKitAPI/Tests/WebKit/WKWebView/WKWebExtensionAPIDeclarativeNetRequest.mm index 6830f7a3a5bc..2c73dd23e1ec 100644 --- a/Tools/TestWebKitAPI/Tests/WebKit/WKWebView/WKWebExtensionAPIDeclarativeNetRequest.mm +++ b/Tools/TestWebKitAPI/Tests/WebKit/WKWebView/WKWebExtensionAPIDeclarativeNetRequest.mm @@ -4016,6 +4016,444 @@ static void runRedirectRule(bool useEnhancedSecurity) [manager run]; } +// Helper: translate a JSON string of DNR rules through the translator pipeline. +static NSDictionary *translateDNRRules(NSString *rulesJSON, NSString *rulesetID, NSArray **outTranslationErrors) +{ + NSData *data = [rulesJSON dataUsingEncoding:NSUTF8StringEncoding]; + NSDictionary *jsonDataDict = @{ rulesetID: data }; + + NSArray *jsonErrors = nil; + auto *jsonObjects = [_WKWebExtensionDeclarativeNetRequestTranslator jsonObjectsFromData:jsonDataDict errorStrings:&jsonErrors]; + + auto *convertedRules = [_WKWebExtensionDeclarativeNetRequestTranslator translateRules:jsonObjects errorStrings:outTranslationErrors]; + + return @{ + @"convertedRules": convertedRules ?: @[], + @"jsonErrors": jsonErrors ?: @[], + }; +} + +// Helper: extract a JSON file from ghostery-dnr-rulesets.zip bundled in test resources. +static NSData *ghosteryRulesetData(NSString *jsonFilename) +{ + NSString *zipPath = [NSBundle.test_resourcesBundle pathForResource:@"ghostery-dnr-rulesets" ofType:@"zip"]; + if (!zipPath) + return nil; + + NSString *tempDir = [NSTemporaryDirectory() stringByAppendingPathComponent:[[NSUUID UUID] UUIDString]]; + NSTask *task = [[NSTask alloc] init]; + task.launchPath = @"/usr/bin/unzip"; + task.arguments = @[@"-o", zipPath, jsonFilename, @"-d", tempDir]; + task.standardOutput = nil; + task.standardError = nil; + [task launch]; + [task waitUntilExit]; + + NSString *jsonPath = [tempDir stringByAppendingPathComponent:jsonFilename]; + NSData *data = [NSData dataWithContentsOfFile:jsonPath]; + [[NSFileManager defaultManager] removeItemAtPath:tempDir error:nil]; + return data; +} + +// MARK: - regexFilter: bounded quantifiers {n}, {n,m}, {n,}, {0} + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterQuantifiers) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"ad[0-9]{2}\\\\.js\" } }," + "{ \"id\": 2, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"tracker-[a-z]{3,8}\\\\.com\" } }," + "{ \"id\": 3, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"pixel-[0-9]{1,}\\\\.gif\" } }," + "{ \"id\": 4, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"v{0}test\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 4u); +} + +// MARK: - regexFilter: top-level alternation (?:a)|(?:b) + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterTopLevelAlternation) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"(?:\\\\/frontend-gtag\\\\.js)|(?:\\\\/gtag\\\\.min\\\\.js)\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 1u); +} + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterQuantifiersBlocking) +{ + TestWebKitAPI::HTTPServer server({ + { "/"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + { "/ad99.html"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + }, TestWebKitAPI::HTTPServer::Protocol::Http); + + auto *backgroundScript = Util::constructScript(@[ + @"browser.test.sendMessage('Load Tab')" + ]); + + auto *manifest = @{ + @"manifest_version": @3, + @"permissions": @[ @"declarativeNetRequest" ], + @"background": @{ @"scripts": @[ @"background.js" ], @"type": @"module", @"persistent": @NO }, + @"declarative_net_request": @{ + @"rule_resources": @[ + @{ + @"id": @"blockAds", + @"enabled": @YES, + @"path": @"rules.json" + } + ] + } + }; + + // {2} quantifier: should block URLs containing "ad" followed by exactly 2 digits + auto *rules = @"[ { \"id\" : 1, \"priority\": 1, \"action\" : { \"type\" : \"block\" }, \"condition\" : { \"regexFilter\" : \"ad[0-9]{2}\" } } ]"; + + auto manager = Util::loadExtension(manifest, @{ @"background.js": backgroundScript, @"rules.json": rules }); + + [manager.get().context setPermissionStatus:WKWebExtensionContextPermissionStatusGrantedExplicitly forPermission:WKWebExtensionPermissionDeclarativeNetRequest]; + + [manager runUntilTestMessage:@"Load Tab"]; + + auto webView = manager.get().defaultTab.webView; + auto navigationDelegate = adoptNS([TestNavigationDelegate new]); + + __block bool receivedActionNotification { false }; + navigationDelegate.get().contentRuleListPerformedAction = ^(WKWebView *, NSString *identifier, _WKContentRuleListAction *action, NSURL *url) { + receivedActionNotification = true; + }; + + webView.navigationDelegate = navigationDelegate.get(); + + [webView loadRequest:server.requestWithLocalhost()]; + + Util::run(&receivedActionNotification); +} + +// MARK: - Bug: regexFilter rejects non-capturing groups (?:...) +// Non-capturing groups are a standard regex feature that doesn't require backtracking +// and can be compiled to a DFA. Chrome and Firefox DNR support them. + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterNonCapturingGroups) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"(?:ads|tracking)\\\\.example\\\\.com\" } }," + "{ \"id\": 2, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"tracker(?:vn)?\\\\.com\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 2u); +} + +// MARK: - Bug: regexFilter rejects word boundaries \b +// Word boundaries are supported by Chrome and Firefox DNR. + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterWordBoundary) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"\\\\.workers\\\\.dev/help/[0-9]+\\\\b\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 1u); +} + +// MARK: - Bug fix: translator no longer emits spurious "duplicate rule id 0" for invalid rules + +TEST(WKWebExtensionAPIDeclarativeNetRequest, InvalidRuleShouldNotCauseDuplicateIDError) +{ + // Use backreferences which are genuinely unsupported (requires backtracking). + auto *rules = @"[" + "{ \"id\": 100, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"(test)\\\\1\" } }," + "{ \"id\": 200, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"(other)\\\\1\" } }" + "]"; + + NSArray *errors = nil; + translateDNRRules(rules, @"test", &errors); + + for (NSString *error in errors) + EXPECT_FALSE([error containsString:@"duplicates the rule id"]); +} + +// MARK: - regexFilter: character class shorthands inside [...] brackets + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterCharacterClassShorthands) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"ad[\\\\d]+\\\\.js\" } }," + "{ \"id\": 2, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"[\\\\w\\\\W]{30,}\" } }," + "{ \"id\": 3, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"id[^\\\\d]?$\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 3u); +} + +// MARK: - regexFilter: $ end-of-line assertion inside alternation groups + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterEndOfLineInAlternation) +{ + auto *rules = @"[" + "{ \"id\": 1, \"priority\": 1, \"action\": { \"type\": \"block\" }, \"condition\": { \"regexFilter\": \"example\\\\.top/l(?:/|$)\" } }" + "]"; + + NSArray *errors = nil; + NSDictionary *result = translateDNRRules(rules, @"test", &errors); + NSArray *converted = result[@"convertedRules"]; + + EXPECT_EQ(errors.count, 0u); + EXPECT_EQ([converted count], 1u); +} + +// MARK: - regexFilter: end-to-end blocking test with alternation + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterAlternationBlocking) +{ + TestWebKitAPI::HTTPServer server({ + { "/"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + { "/tracking.html"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + }, TestWebKitAPI::HTTPServer::Protocol::Http); + + auto *backgroundScript = Util::constructScript(@[ + @"browser.test.sendMessage('Load Tab')" + ]); + + auto *manifest = @{ + @"manifest_version": @3, + @"permissions": @[ @"declarativeNetRequest" ], + @"background": @{ @"scripts": @[ @"background.js" ], @"type": @"module", @"persistent": @NO }, + @"declarative_net_request": @{ + @"rule_resources": @[ + @{ @"id": @"blockAds", @"enabled": @YES, @"path": @"rules.json" } + ] + } + }; + + auto *rules = @"[ { \"id\" : 1, \"priority\": 1, \"action\" : { \"type\" : \"block\" }, \"condition\" : { \"regexFilter\" : \"(?:tracking|analytics)\\\\.html\" } } ]"; + + auto manager = Util::loadExtension(manifest, @{ @"background.js": backgroundScript, @"rules.json": rules }); + [manager.get().context setPermissionStatus:WKWebExtensionContextPermissionStatusGrantedExplicitly forPermission:WKWebExtensionPermissionDeclarativeNetRequest]; + [manager runUntilTestMessage:@"Load Tab"]; + + auto webView = manager.get().defaultTab.webView; + auto navigationDelegate = adoptNS([TestNavigationDelegate new]); + + __block bool receivedActionNotification { false }; + navigationDelegate.get().contentRuleListPerformedAction = ^(WKWebView *, NSString *identifier, _WKContentRuleListAction *action, NSURL *url) { + receivedActionNotification = true; + }; + + webView.navigationDelegate = navigationDelegate.get(); + [webView loadRequest:server.requestWithLocalhost()]; + + Util::run(&receivedActionNotification); +} + +// MARK: - regexFilter: end-to-end blocking test with \d shorthand + +TEST(WKWebExtensionAPIDeclarativeNetRequest, RegexFilterDigitShorthandBlocking) +{ + TestWebKitAPI::HTTPServer server({ + { "/"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + { "/tracker42.html"_s, { { { "Content-Type"_s, "text/html"_s } }, ""_s } }, + }, TestWebKitAPI::HTTPServer::Protocol::Http); + + auto *backgroundScript = Util::constructScript(@[ + @"browser.test.sendMessage('Load Tab')" + ]); + + auto *manifest = @{ + @"manifest_version": @3, + @"permissions": @[ @"declarativeNetRequest" ], + @"background": @{ @"scripts": @[ @"background.js" ], @"type": @"module", @"persistent": @NO }, + @"declarative_net_request": @{ + @"rule_resources": @[ + @{ @"id": @"blockAds", @"enabled": @YES, @"path": @"rules.json" } + ] + } + }; + + auto *rules = @"[ { \"id\" : 1, \"priority\": 1, \"action\" : { \"type\" : \"block\" }, \"condition\" : { \"regexFilter\" : \"tracker\\\\d+\" } } ]"; + + auto manager = Util::loadExtension(manifest, @{ @"background.js": backgroundScript, @"rules.json": rules }); + [manager.get().context setPermissionStatus:WKWebExtensionContextPermissionStatusGrantedExplicitly forPermission:WKWebExtensionPermissionDeclarativeNetRequest]; + [manager runUntilTestMessage:@"Load Tab"]; + + auto webView = manager.get().defaultTab.webView; + auto navigationDelegate = adoptNS([TestNavigationDelegate new]); + + __block bool receivedActionNotification { false }; + navigationDelegate.get().contentRuleListPerformedAction = ^(WKWebView *, NSString *identifier, _WKContentRuleListAction *action, NSURL *url) { + receivedActionNotification = true; + }; + + webView.navigationDelegate = navigationDelegate.get(); + [webView loadRequest:server.requestWithLocalhost()]; + + Util::run(&receivedActionNotification); +} + +// MARK: - Bug: large real-world ruleset produces silent translation errors +// Tested with Ghostery's adblocking DNR ruleset (73k+ rules). +// In production Safari, these errors are silently discarded because: +// 1. jsonDeserializationErrorStrings is collected but never read +// 2. parsingErrorStrings is only processed behind ENABLE(DNR_ON_RULE_MATCHED_DEBUG) which defaults to 0 + +TEST(WKWebExtensionAPIDeclarativeNetRequest, GhosteryLargeRulesetTranslation) +{ + NSData *rulesData = ghosteryRulesetData(@"dnr-ads.json"); + EXPECT_NOT_NULL(rulesData); + if (!rulesData) + return; + + NSDictionary *jsonDataDict = @{ @"ghostery-ads": rulesData }; + + NSArray *jsonDeserializationErrors = nil; + auto *allJSONObjects = [_WKWebExtensionDeclarativeNetRequestTranslator jsonObjectsFromData:jsonDataDict errorStrings:&jsonDeserializationErrors]; + + NSUInteger totalRulesParsed = 0; + for (NSString *key in allJSONObjects) + totalRulesParsed += [allJSONObjects[key] count]; + NSLog(@"Ghostery DNR: %lu bytes, %lu rules parsed", (unsigned long)rulesData.length, (unsigned long)totalRulesParsed); + + NSArray *translationErrors = nil; + auto *convertedRules = [_WKWebExtensionDeclarativeNetRequestTranslator translateRules:allJSONObjects errorStrings:&translationErrors]; + + NSLog(@"Ghostery DNR: %lu translation errors, %lu content blocker rules produced", (unsigned long)translationErrors.count, (unsigned long)convertedRules.count); + for (NSString *error in translationErrors) + NSLog(@" %@", error); + + EXPECT_GT(convertedRules.count, 0u); + EXPECT_EQ(jsonDeserializationErrors.count, 0u); + EXPECT_EQ(translationErrors.count, 0u); +} + +// MARK: - Large real-world ruleset compilation performance +// Ghostery's adblocking ruleset (74k+ content blocker rules after translation) +// takes ~65 seconds to compile to DFA bytecode, exceeding typical extension loading timeouts. + +TEST(WKWebExtensionAPIDeclarativeNetRequest, GhosteryLargeRulesetCompilation) +{ + NSData *rulesData = ghosteryRulesetData(@"dnr-ads.json"); + EXPECT_NOT_NULL(rulesData); + if (!rulesData) + return; + + NSDictionary *jsonDataDict = @{ @"ghostery-ads": rulesData }; + + NSArray *jsonErrors = nil; + auto *allJSONObjects = [_WKWebExtensionDeclarativeNetRequestTranslator jsonObjectsFromData:jsonDataDict errorStrings:&jsonErrors]; + + NSArray *translationErrors = nil; + auto *convertedRules = [_WKWebExtensionDeclarativeNetRequestTranslator translateRules:allJSONObjects errorStrings:&translationErrors]; + + NSError *jsonSerializationError = nil; + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:convertedRules options:0 error:&jsonSerializationError]; + EXPECT_NULL(jsonSerializationError); + if (jsonSerializationError) + return; + + NSString *jsonString = [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding]; + NSLog(@"Ghostery DNR: compiling %lu content blocker rules (%lu bytes)", (unsigned long)convertedRules.count, (unsigned long)jsonString.length); + + __block bool compilationDone = false; + __block bool compilationSucceeded = false; + + auto startTime = [NSDate date]; + + [[WKContentRuleListStore defaultStore] compileContentRuleListForIdentifier:@"ghostery-ads-test" encodedContentRuleList:jsonString completionHandler:^(WKContentRuleList *ruleList, NSError *error) { + auto elapsed = -[startTime timeIntervalSinceNow]; + NSLog(@"Ghostery DNR: compilation %@ in %.1f seconds%s", ruleList ? @"succeeded" : @"FAILED", elapsed, error ? [NSString stringWithFormat:@": %@", error].UTF8String : ""); + + compilationSucceeded = (ruleList != nil); + compilationDone = true; + }]; + + TestWebKitAPI::Util::run(&compilationDone); + + EXPECT_TRUE(compilationSucceeded); + + [[WKContentRuleListStore defaultStore] removeContentRuleListForIdentifier:@"ghostery-ads-test" completionHandler:^(NSError *error) { }]; +} + +// MARK: - Bug: enabling multiple rulesets silently fails when merged total exceeds +// ContentExtensionParser's hardcoded 150,000 rule limit (ContentExtensionParser.cpp:334). +// WebKit merges all enabled rulesets from an extension into a single content blocker +// before compilation, so toggling additional rulesets on can push the combined total +// past the limit, causing compileContentRuleListFile to fail with JSONTooManyRules. +// The error propagates only as RELEASE_LOG_ERROR; the extension sees nothing. + +TEST(WKWebExtensionAPIDeclarativeNetRequest, GhosteryCombinedRulesetsExceed150kLimit) +{ + NSData *ads = ghosteryRulesetData(@"dnr-ads.json"); + NSData *tracking = ghosteryRulesetData(@"dnr-tracking.json"); + NSData *annoyances = ghosteryRulesetData(@"dnr-annoyances.json"); + EXPECT_NOT_NULL(ads); + EXPECT_NOT_NULL(tracking); + EXPECT_NOT_NULL(annoyances); + if (!ads || !tracking || !annoyances) + return; + + NSDictionary *jsonDataDict = @{ + @"ads": ads, + @"tracking": tracking, + @"annoyances": annoyances, + }; + + NSArray *jsonErrors = nil; + auto *allJSONObjects = [_WKWebExtensionDeclarativeNetRequestTranslator jsonObjectsFromData:jsonDataDict errorStrings:&jsonErrors]; + + NSArray *translationErrors = nil; + auto *convertedRules = [_WKWebExtensionDeclarativeNetRequestTranslator translateRules:allJSONObjects errorStrings:&translationErrors]; + + NSLog(@"Ghostery combined: %lu translated rules (limit is 150000)", (unsigned long)convertedRules.count); + + NSData *jsonData = [NSJSONSerialization dataWithJSONObject:convertedRules options:0 error:nil]; + NSString *jsonString = [[NSString alloc] initWithData:jsonData encoding:NSUTF8StringEncoding]; + + __block bool done = false; + __block NSError *compilationError = nil; + + [[WKContentRuleListStore defaultStore] compileContentRuleListForIdentifier:@"ghostery-combined-test" encodedContentRuleList:jsonString completionHandler:^(WKContentRuleList *ruleList, NSError *error) { + compilationError = error; + done = true; + }]; + + TestWebKitAPI::Util::run(&done); + + if (convertedRules.count > 150000) { + NSLog(@"Ghostery combined: compilation failed as expected: %@", compilationError); + EXPECT_NOT_NULL(compilationError); + } else { + NSLog(@"Ghostery combined: under 150k limit — compilation should succeed"); + EXPECT_NULL(compilationError); + } + + [[WKContentRuleListStore defaultStore] removeContentRuleListForIdentifier:@"ghostery-combined-test" completionHandler:^(NSError *error) { }]; +} + } // namespace TestWebKitAPI #endif // ENABLE(WK_WEB_EXTENSIONS)