ref: a5b08e27b75cdacc6f8405febbc55b40095fb1af
parent: 56185b12abb42c178037d10245526e812597ab6d
author: MHS <gek@katherine>
date: Sat Apr 10 08:15:02 EDT 2021
Automatic commit.
--- a/include-demo/stringutil.h
+++ b/include-demo/stringutil.h
@@ -5,8 +5,8 @@
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
-//Before we get on, "stringutil.h" is the most C-ish name for a source code file ever, amirite?
+
#ifndef STRUTIL_ALLOC
#define STRUTIL_ALLOC(s) malloc(s)
#endif
@@ -26,8 +26,8 @@
#define strcatafb strcatallocfb
#endif
-//Strcat but with malloc.
-static inline char* strcatalloc(const char* s1, const char* s2){
+
+static char* strcatalloc(const char* s1, const char* s2){
char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);
if(d){
strcpy(d, s1);
@@ -35,19 +35,18 @@
}
return d;
}
-//Free the first argument.
-static inline char* strcatallocf1(char* s1, const char* s2){
+
+static char* strcatallocf1(char* s1, const char* s2){
char* d = STRUTIL_REALLOC(s1, strlen(s1) + strlen(s2) + 1);
- //char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);
if(d){
- //strcpy(d, s1);
+
strcat(d, s2);
}
- //STRUTIL_FREE(s1);
+
return d;
}
-//Free the second argument.
-static inline char* strcatallocf2(const char* s1, char* s2){
+
+static char* strcatallocf2(const char* s1, char* s2){
char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);
if(d){
strcpy(d, s1);
@@ -56,8 +55,8 @@
STRUTIL_FREE(s2);
return d;
}
-//Free both arguments
-static inline char* strcatallocfb(char* s1, char* s2){
+
+static char* strcatallocfb(char* s1, char* s2){
char* d = NULL; d = STRUTIL_ALLOC(strlen(s1) + strlen(s2) + 1);
if(d){
strcpy(d, s1);
@@ -68,8 +67,8 @@
return d;
}
-//Convert a non-null-terminated URL into a null terminated one.
-static inline char* str_null_terminated_alloc(const char* in, unsigned int len){
+
+static char* str_null_terminated_alloc(const char* in, unsigned int len){
char* d = NULL; d = malloc(len+1);
if(d){
memcpy(d,in,len);
@@ -78,22 +77,22 @@
return d;
}
-static inline unsigned int strprefix(const char *pre, const char *str)
+static unsigned int strprefix(const char *pre, const char *str)
{
- size_t lenpre = strlen(pre),
+ unsigned long lenpre = strlen(pre),
lenstr = strlen(str);
return lenstr < lenpre ? 0 : memcmp(pre, str, lenpre) == 0;
}
-static inline unsigned int streq(const char *pre, const char *str)
+static unsigned int streq(const char *pre, const char *str)
{
return strcmp(pre, str) == 0;
}
-//Someone once said sub-string search was an O(n^2) algorithm. What the hell?
-static inline long long strfind(const char* text, const char* subtext){
- long long ti = 0;
- long long si = 0;
- long long st = strlen(subtext);
+
+static long strfind(const char* text, const char* subtext){
+ long ti = 0;
+ long si = 0;
+ long st = strlen(subtext);
for(;text[ti] != '\0';ti++){
if(text[ti] == subtext[si]) {
si++;
@@ -100,7 +99,7 @@
if(subtext[si] == '\0') return (ti - st)+1;
}else {
si = 0;
- if(text[ti] == subtext[si])si++; //Minor case.
+ if(text[ti] == subtext[si])si++; /*Minor case.*/
if(subtext[si] == '\0') return (ti - st);
}
}
@@ -107,10 +106,9 @@
return -1;
}
-//Read file until terminator character is found.
-//Returns the number of characters copied.
-static inline unsigned long long read_until_terminator(FILE* f, char* buf, const unsigned long long buflen, char terminator){
- unsigned long long i = 0;
+
+static unsigned long read_until_terminator(FILE* f, char* buf, const unsigned long buflen, char terminator){
+ unsigned long i = 0;
char c;
for(i = 0; i < (buflen-1); i++)
{
@@ -119,37 +117,39 @@
if(c == terminator)break;
buf[i] = c;
}
- buf[buflen-1] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!
+ buf[buflen-1] = '\0';
return i;
}
-//Same as above but allocates memory to guarantee it can hold the entire thing. Grows naturally.
-static inline char* read_until_terminator_alloced(FILE* f, unsigned long long* lenout, char terminator, unsigned long long initsize){
+
+static char* read_until_terminator_alloced(FILE* f, unsigned long* lenout, char terminator, unsigned long initsize){
char c;
- char* buf = STRUTIL_ALLOC(initsize);
+ char* buf;
+ unsigned long bcap = initsize;
+ char* bufold;
+ unsigned long blen = 0;
+ buf = STRUTIL_ALLOC(initsize);
if(!buf) return NULL;
- unsigned long long bcap = initsize;
- unsigned long long blen = 0;
while(1){
if(feof(f)){break;}
c = fgetc(f);
if(c == terminator) {break;}
- if(blen == (bcap-1)) //Grow the buffer.
+ if(blen == (bcap-1)) /*Grow the buffer.*/
{
bcap<<=1;
- char* bufold = buf;
+ bufold = buf;
buf = STRUTIL_REALLOC(buf, bcap);
if(!buf){free(bufold); return NULL;}
}
buf[blen++] = c;
}
- buf[blen] = '\0'; //READ_UNTIL_TERMINATOR ALWAYS RETURNS A VALID STRING!
+ buf[blen] = '\0';
*lenout = blen;
return buf;
}
-static inline void* read_file_into_alloced_buffer(FILE* f, unsigned long long* len){
+static void* read_file_into_alloced_buffer(FILE* f, unsigned long* len){
void* buf = NULL;
if(!f) return NULL;
fseek(f, 0, SEEK_END);
@@ -162,28 +162,32 @@
return buf;
}
-//GEK'S SIMPLE TEXT COMPRESSION SCHEMA
+
/*LIMITATIONS
+GEK'S SIMPLE TEXT COMPRESSION SCHEMA
* Token names must be alphabetic (a-z, A-Z)
* The token mark must be escaped with a backslash.
* Token names which are substrings of other ones must be listed later
*/
-static inline char* strencodealloc(const char* inbuf, const char** tokens, unsigned long long ntokens, char esc, char tokmark){
- unsigned long long lenin = strlen(inbuf);
- char c; unsigned long long i = 0;
- char c_str[512] = {0}; //We are going to be sprintf-ing to this buffer.
+static char* strencodealloc(const char* inbuf, const char** tokens, unsigned long ntokens, char esc, char tokmark){
+ unsigned long lenin;
+ char c; unsigned long i = 0; unsigned long j;
+ char c_str[512];
char* out = NULL;
+
+ lenin = strlen(inbuf);
c_str[0] = esc;
c_str[1] = tokmark;
out = strcatalloc(c_str, "");
c_str[0] = 0;
c_str[1] = 0;
- //Write out all the token entries. format is namelength~definition
- for(unsigned long long j = 0; j < ntokens; j++){
+ c_str[511] = 0;
+
+ for(j = 0; j < ntokens; j++){
out = strcataf1(out, tokens[2*j]);
- //Write out the length of the token.
- snprintf(c_str, 512, "%llu", (unsigned long long)strlen(tokens[2*j+1]));
+
+ snprintf(c_str, 512, "%llu", (unsigned long)strlen(tokens[2*j+1]));
out = strcataf1(out, c_str);
c_str[0] = tokmark;
c_str[1] = 0;
@@ -193,25 +197,25 @@
c_str[0] = esc;
c_str[1] = 0;
out = strcataf1(out, c_str);
- //We have now created the header. Now to begin encoding the text.
- for(i=0; i<lenin; i++){
- for(unsigned long long t = 0; t < ntokens; t++) //t- the token we are processing.
- if(strprefix(tokens[t*2+1], inbuf+i)){ //Matched at least one
- unsigned long long howmany = 1;
- unsigned long long curtoklen = strlen(tokens[t*2+1]); //Length of the current token we are counting
- for(unsigned long long h=1;i+h*curtoklen < lenin;h++){
+
+ for(i=0; i<lenin; i++){ unsigned long t;
+ for(t = 0; t < ntokens; t++) /*t- the token we are processing.*/
+ if(strprefix(tokens[t*2+1], inbuf+i)){ /*Matched at least one*/
+ unsigned long h, curtoklen, howmany = 1;
+ curtoklen = strlen(tokens[t*2+1]); /*Length of the current token we are counting*/
+ for(h=1;i+h*curtoklen < lenin;h++){
if(strprefix(tokens[t*2+1], inbuf+i+h*curtoklen))
{howmany++;}
else
- break; //The number of these things is limited.
+ break; /*The number of these things is limited.*/
}
- //We know what token and how many, write it to out
+ /*We know what token and how many, write it to out*/
c_str[0] = tokmark;
c_str[1] = 0;
out = strcataf1(out, c_str);
if(howmany > 1){
- snprintf(c_str, 512, "%llu", (unsigned long long)howmany);
+ snprintf(c_str, 512, "%llu", (unsigned long)howmany);
out = strcataf1(out, c_str);
}
out = strcataf1(out, tokens[t*2]);
@@ -218,13 +222,13 @@
i+=howmany*curtoklen;
continue;
}
- //Test if we need to escape a sequence.
+ /*Test if we need to escape a sequence.*/
if(inbuf[i] == esc || inbuf[i] == tokmark){
c_str[0] = esc;
c_str[1] = 0;
out = strcataf1(out, c_str);
}
- //We were unable to find a match, just write the character out.
+ /*We were unable to find a match, just write the character out.*/
c_str[0] = inbuf[i];
c_str[1] = 0;
out = strcataf1(out, c_str);
@@ -232,44 +236,47 @@
return out;
}
-static inline char* strdecodealloc(char* inbuf){
- unsigned long long lenin = strlen(inbuf);
- if(lenin < 3) {
- //puts("\nToo Short!\n");
+static char* strdecodealloc(char* inbuf){
+ unsigned long lenin, ntokens;
+ char* out;char** tokens = NULL; char esc; char tokmark; long doescape;
+ char c; unsigned long vv,l,i = 2;
+ char c_str[2] = {0,0};
+ esc = inbuf[0];
+ tokmark = inbuf[1];
+
+ lenin = strlen(inbuf);
+
+
+ out = strcatalloc("","");
+
+
+
+
+ ntokens = 0;
+ if(lenin < 3) {
+
return NULL;
}
- char esc = inbuf[0]; //The escape character is the first one.
- char tokmark = inbuf[1]; //Begin token character.
- //printf("Escape is %c, tokmark is %c\n", esc, tokmark);
- char c; unsigned long long i = 2;
- char c_str[2] = {0,0};
- //Our decoded text.
- char* out = strcatalloc("","");
- //Tokens for replacement, even is the token,
- //odd is its definition
- char** tokens = NULL;
- //unsigned long long* toklens = NULL;
- unsigned long long ntokens = 0;
-//#define {if(i <= lenin) c = inbuf[i++]; else {goto end;}} {if(i <= lenin) c = inbuf[i++]; else {goto end;}}
- //Retrieve the tokens.
- {if(i <= lenin) c = inbuf[i++]; else {goto end;}}; //has to occur before the loop.
+
+
+ {if(i <= lenin) c = inbuf[i++]; else {goto end;}}; /*has to occur before the loop.*/
while(c != esc){ ntokens++;
tokens = STRUTIL_REALLOC(tokens, ntokens * 2 * sizeof(char*));
- //toklens = STRUTIL_REALLOC(toklens, ntokens * sizeof(unsigned long long));
- //toklens[ntokens-1] = 0;
+
+
tokens[(ntokens-1)*2] = strcatalloc("","");
tokens[(ntokens-1)*2+1] = strcatalloc("","");
- //name of token is tokens[(ntokens-1)*2] and its definition is tokens[(ntokens-1)*2+1]
- //Get the name of the token.
- if(!isalpha(c)) goto end; //Error! Can't have Break out.
+
+
+ if(!isalpha(c)) goto end;
while(isalpha(c)){
c_str[0] = c;
tokens[(ntokens-1)*2] = strcatallocf1(tokens[(ntokens-1)*2], c_str);
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
}
- //The last retrieve() got us the first digit of the token length.
- //Get the length of the token
- unsigned long long l = 0;
+
+
+ l = 0;
if(!isdigit(c)) goto end;
while(isdigit(c) && c!=tokmark){
c_str[0] = c;
@@ -277,28 +284,28 @@
l += atoi(c_str);
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
}
- //toklens[ntokens-1] = l;
- //We have the name of the token and its length, the last {if(i <= lenin) c = inbuf[i++]; else {goto end;}} got us the token character (~ in my example)
- //Now we can grab the token definition.
- for(unsigned long long vv = 0; vv < l; vv++){
+
+
+
+ for(vv = 0; vv < l; vv++){
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
c_str[0] = c;
tokens[(ntokens-1)*2+1] = strcatallocf1(tokens[(ntokens-1)*2+1], c_str);
}
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
- // printf("\nTOKEN %s IS %s, length %llu",tokens[(ntokens-1)*2] ,tokens[(ntokens-1)*2+1], l);
+
}
- //puts("\nREACHED ESCAPE CHARACTER.");
- //Now we attempt to build our string
+
+
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
- long long doescape = 0;
+ doescape = 0;
while(i<=lenin){
if(!doescape && c==esc){
doescape=1;{if(i <= lenin) c = inbuf[i++]; else {goto end;}};continue;
}
if(!doescape && c==tokmark){
- //Handle digits prefixing a token.
- unsigned long long l = 0;
+ /*Handle digits prefixing a token.*/
+ unsigned long t,l = 0;
{if(i <= lenin) c = inbuf[i++]; else {goto end;}};
if(isdigit(c))
while(isdigit(c)){
@@ -310,14 +317,14 @@
else {l=1;}
i--;
- for(unsigned long long t = 0; t < ntokens; t++)
- if(strprefix(tokens[t*2], inbuf+i)){
- //MATCH!
- for(unsigned long long q = 0; q < l; q++)
+ for(t = 0; t < ntokens; t++)
+ if(strprefix(tokens[t*2], inbuf+i)){ unsigned long q;
+ for(q = 0; q < l; q++)
out = strcatallocf1(out, tokens[t*2+1]);
i+=strlen(tokens[t*2]);
- break; //break out of the for.
+ break; /*break out of the for.*/
}
+
if(i<=lenin) {if(i <= lenin) c = inbuf[i++]; else {goto end;}};
continue;
}else{
@@ -328,13 +335,13 @@
}
}
end:
- if(tokens){
- for(unsigned long long j = 0; j < ntokens; j++)
+ if(tokens){unsigned long j;
+ for(j = 0; j < ntokens; j++)
{STRUTIL_FREE(tokens[j*2]);STRUTIL_FREE(tokens[j*2+1]);}
STRUTIL_FREE(tokens);
}
- //if(toklens)STRUTIL_FREE(toklens);
+
return out;
}
-//STRUTIL_H
+
#endif