Replies: 2 comments
-
I like that idea! |
Beta Was this translation helpful? Give feedback.
0 replies
-
In case anyone is interested, I've written a source scanner to look for keywords like malloc, goto etc... https://briankhuu.com/blog/2025/01/26/clibs-source-scanner-for-keyword-suggestion/ Maybe we could integrate something like this as a tag suggester. tcc -run ./source_keyword_suggester.c 2>&- << HEREDOC
#include <stdlib.h>
int main() {
int *ptr = malloc(100);
free(ptr);
goto end;
end:
return 0;
}
HEREDOC Which would output:
This is because internally it sees:
So we could try extending it further to see if a source is using certain includes like C Source/*
Clibs Source Scanner For Keyword Suggestion
Author: Brian Khuu (2025)
This idea is to provide a mechanism for scanning a source code and
giving some keyword suggestions based on the properties of the
source code. In this case, I would like to mark a source code as
using dynamic memory or not. This won't be perfect, but it may help
encourage library writers to use keywords if it's already provided.
# MIT License
Copyright (c) 2025 Brian Khuu
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
*/
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#define TOKEN_MAX 256
typedef struct SourceTokenScanner
{
int prev;
int quote_char;
bool in_single_line_comment;
bool in_multi_line_comment;
bool token_ready;
bool token_is_function;
char token[TOKEN_MAX + 1];
size_t token_size;
} SourceTokenScanner;
typedef struct SourceTagger
{
bool uses_malloc;
bool uses_free;
bool uses_realloc;
bool uses_goto;
} SourceTagger;
bool sourceTokenScanner(SourceTokenScanner *context, const char ch)
{
if (context->token_ready)
{
context->token[0] = '\0';
context->token_size = 0;
context->token_ready = false;
}
if (ch == '\'' || ch == '"' || context->quote_char)
{
if (context->quote_char)
{
if (ch == context->quote_char && context->prev != '\\')
{
context->quote_char = '\0';
}
else
{
context->prev = ch;
}
}
else
{
context->quote_char = ch;
context->prev = 0;
context->token[0] = '\0';
context->token_size = 0;
}
return false;
}
if (context->prev == '/' && ch == '/' || context->in_single_line_comment)
{
if (context->in_single_line_comment)
{
if (ch == '\n')
{
context->in_single_line_comment = false;
}
}
else
{
context->in_single_line_comment = true;
context->token[0] = '\0';
context->token_size = 0;
}
return false;
}
if (context->prev == '/' && ch == '*' || context->in_multi_line_comment)
{
if (context->in_multi_line_comment)
{
if (context->prev == '*' && ch == '/')
{
context->in_multi_line_comment = false;
context->prev = '\0';
}
else
{
context->prev = ch;
}
}
else
{
context->in_multi_line_comment = true;
context->prev = 0;
context->token[0] = '\0';
context->token_size = 0;
}
return false;
}
context->prev = ch;
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || (ch == '_') || (ch == '.'))
{
if (context->token_size < TOKEN_MAX)
{
context->token[context->token_size++] = ch;
context->token[context->token_size] = '\0';
}
return false;
}
if (context->token_size > 0)
{
if (context->token_size <= 2 || ('0' <= context->token[0] && context->token[0] <= '9') || (context->token[0] == '.'))
{
// Exclude short tokens or invalid starting characters
// Dev Note: Shortest function name is 3 characters long (e.g. sin, cos, tan, log etc...)
context->token[0] = '\0';
context->token_size = 0;
return false;
}
else
{
// Heuristic Token Found
context->token_is_function = (ch == '(') ? true : false;
context->token_ready = true;
return true;
}
}
return false;
}
void add_tags(char **tags, size_t *tag_count, size_t max_tags, char *tag)
{
// Check if tag already added
for (int i = 0; i < max_tags; i++)
{
if (tags[i] != NULL && (strcmp(tags[i], tag) == 0))
{
return;
}
}
// Add tag
if (*tag_count < max_tags)
{
tags[*tag_count] = tag;
*tag_count = *tag_count + 1;
}
}
int main()
{
char ch;
SourceTokenScanner sourceTokenScannerState = {0};
SourceTagger sourceTagger = {0};
/* Scan Source For Indicator Tokens */
while ((ch = getchar()) != EOF)
{
if (sourceTokenScanner(&sourceTokenScannerState, ch))
{
if (sourceTokenScannerState.token_is_function)
{
// Function
if (strcmp(sourceTokenScannerState.token, "malloc") == 0)
{
sourceTagger.uses_malloc = true;
}
else if (strcmp(sourceTokenScannerState.token, "free") == 0)
{
sourceTagger.uses_free = true;
}
else if (strcmp(sourceTokenScannerState.token, "realloc") == 0)
{
sourceTagger.uses_realloc = true;
}
}
else
{
if (strcmp(sourceTokenScannerState.token, "goto") == 0)
{
sourceTagger.uses_goto = true;
}
}
fprintf(stderr, "%s%s\n", sourceTokenScannerState.token, sourceTokenScannerState.token_is_function ? "()" : "");
}
}
/* Generate Tag Suggestions */
char *tags[100] = {NULL};
size_t tag_count = 0;
if (!sourceTagger.uses_malloc && !sourceTagger.uses_free && !sourceTagger.uses_realloc)
{
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "no heap");
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heapless");
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "no malloc");
}
if (sourceTagger.uses_malloc)
{
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used");
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "malloc");
}
if (sourceTagger.uses_free)
{
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used");
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "free");
}
if (sourceTagger.uses_realloc)
{
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "heap used");
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "realloc");
}
if (sourceTagger.uses_goto)
{
add_tags(tags, &tag_count, (sizeof(tags) / sizeof(tags[0])), "goto used");
}
/* Print Tag Suggestions */
printf("[");
for (int i = 0; i < tag_count; i++)
{
if (i > 0)
{
printf(", ");
}
printf("\"%s\"", tags[i]);
}
printf("]");
return 0;
} |
Beta Was this translation helpful? Give feedback.
0 replies
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
-
It would be handy to filter different packages based on if it's using the heap or not.
This could be done by either adding a tag field to
clib.json
on the maintainer side or by adding tags to https://github.com/clibs/clib/wiki/PackagesWould it also be possible to scan the source code for calls to 'malloc()'? Certainly not a guarantee... but it would allow for adding a 'heap required' tag
Beta Was this translation helpful? Give feedback.
All reactions