Skip to content

Commit

Permalink
Implement text string sets. (#1787)
Browse files Browse the repository at this point in the history
* Implement text string sets.

Add support for text string sets into the grammar. They look like this:

for any s in ("a", "b"): (pe.imphash() == s)

This requires changing integer_set and integer_enumeration to just be set and
enumeration, and adding a new type (YR_ENUMERATION) that tracks the type of
enumeration (integer or otherwise) and the number of items in the enumeration.

The enumeration now checks that each type of the item in the enumeration is
consistent and will cause a compiler error if they are not. For example, this is
an error:

for any s in ("a", 0): (s)

Also, fix the build when using --with-debug-verbose option as it was missing the
assert.h include.

* Add docs and adjust constants layout.
  • Loading branch information
wxsBSD authored Sep 23, 2022
1 parent ba78a72 commit d6fc080
Show file tree
Hide file tree
Showing 10 changed files with 620 additions and 470 deletions.
12 changes: 12 additions & 0 deletions docs/writingrules.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1269,6 +1269,18 @@ occurrences, the first offset, and the length of each string respectively.
for all of ($a*) : ( @ > @b )


Starting with YARA 4.3.0 you can express conditions over text strings like this:

.. code-block:: yara

for any s in ("71b36345516e076a0663e0bea97759e4", "1e7f7edeb06de02f2c2a9319de99e033") : ( pe.imphash() == s )

It is worth remembering here that the two hashes referenced in the rule are
normal text strings, and have nothing to do with the string section of the rule.
Inside the loop condition the result of the `pe.imphash()` function is compared
to each of the text strings, resulting in a more concise rule.


Using anonymous strings with ``of`` and ``for..of``
---------------------------------------------------

Expand Down
68 changes: 68 additions & 0 deletions libyara/exec.c
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,37 @@ static int iter_string_set_next(YR_ITERATOR* self, YR_VALUE_STACK* stack)
return ERROR_SUCCESS;
}

static int iter_text_string_set_next(YR_ITERATOR* self, YR_VALUE_STACK* stack)
{
// Check that there's two available slots in the stack, one for the next
// item returned by the iterator and another one for the boolean that
// indicates if there are more items.
if (stack->sp + 1 >= stack->capacity)
return ERROR_EXEC_STACK_OVERFLOW;

// If the current index is equal or larger than array's length the iterator
// has reached the end of the array.
if (self->text_string_set_it.index >= self->text_string_set_it.count)
goto _stop_iter;

// Push the false value that indicates that the iterator is not exhausted.
stack->items[stack->sp++].i = 0;
stack->items[stack->sp++].ss =
self->text_string_set_it.strings[self->text_string_set_it.index];
self->text_string_set_it.index++;

return ERROR_SUCCESS;

_stop_iter:

// Push true for indicating the iterator has been exhausted.
stack->items[stack->sp++].i = 1;
// Push YR_UNDEFINED as a placeholder for the next item.
stack->items[stack->sp++].i = YR_UNDEFINED;

return ERROR_SUCCESS;
}

// Global table that contains the "next" function for different types of
// iterators. The reason for using this table is to avoid storing pointers
// in the YARA's VM stack. Instead of the pointers we store an index within
Expand All @@ -374,13 +405,15 @@ static YR_ITERATOR_NEXT_FUNC iter_next_func_table[] = {
iter_int_range_next,
iter_int_enum_next,
iter_string_set_next,
iter_text_string_set_next,
};

#define ITER_NEXT_ARRAY 0
#define ITER_NEXT_DICT 1
#define ITER_NEXT_INT_RANGE 2
#define ITER_NEXT_INT_ENUM 3
#define ITER_NEXT_STRING_SET 4
#define ITER_NEXT_TEXT_STRING_SET 5

int yr_execute_code(YR_SCAN_CONTEXT* context)
{
Expand Down Expand Up @@ -606,6 +639,41 @@ int yr_execute_code(YR_SCAN_CONTEXT* context)
stop = (result != ERROR_SUCCESS);
break;

case OP_ITER_START_TEXT_STRING_SET:
YR_DEBUG_FPRINTF(
2,
stderr,
"- case OP_ITER_START_TEXT_STRING_SET: // %s()\n",
__FUNCTION__);

pop(r1);

r3.p = yr_notebook_alloc(
it_notebook,
sizeof(YR_ITERATOR) + sizeof(SIZED_STRING*) * (size_t) r1.i);

if (r3.p == NULL)
{
result = ERROR_INSUFFICIENT_MEMORY;
}
else
{
r3.it->text_string_set_it.count = r1.i;
r3.it->text_string_set_it.index = 0;
r3.it->next_func_idx = ITER_NEXT_TEXT_STRING_SET;

for (int64_t i = r1.i; i > 0; i--)
{
pop(r2);
r3.it->text_string_set_it.strings[i - 1] = r2.ss;
}

push(r3);
}

stop = (result != ERROR_SUCCESS);
break;

case OP_ITER_NEXT:
YR_DEBUG_FPRINTF(
2, stderr, "- case OP_ITER_NEXT: // %s()\n", __FUNCTION__);
Expand Down
Loading

0 comments on commit d6fc080

Please sign in to comment.