parent
89f3345746
commit
70bac99083
1
.gitignore
vendored
1
.gitignore
vendored
@ -7,4 +7,5 @@ __pycache__
|
|||||||
target
|
target
|
||||||
isspam
|
isspam
|
||||||
risspam
|
risspam
|
||||||
|
isspam_cpp
|
||||||
.build-trigger-2014-12-02 15:26
|
.build-trigger-2014-12-02 15:26
|
||||||
|
9
Makefile
9
Makefile
@ -1,20 +1,20 @@
|
|||||||
CC = gcc
|
CC = gcc
|
||||||
CFLAGS = -Ofast
|
CFLAGS = -Ofast
|
||||||
|
|
||||||
all: build run valgrind build_risspam run_risspam
|
all: build run valgrind build_risspam run_risspam build_cpp
|
||||||
|
|
||||||
build:
|
build:
|
||||||
@echo "Compiling retoor_c project.".
|
@echo "Compiling retoor_c project.".
|
||||||
@# removed -pedantic flag because it doesn't accept ' for formatting numbers
|
|
||||||
@# using printf
|
|
||||||
@$(CC) $(CFLAGS) retoor_c/isspam.c -o isspam
|
@$(CC) $(CFLAGS) retoor_c/isspam.c -o isspam
|
||||||
|
|
||||||
|
build_cpp:
|
||||||
|
@echo "Compiling C++ version of isspam."
|
||||||
|
@g++ -Ofast retoor_c/isspam.cpp -o isspam_cpp
|
||||||
|
|
||||||
build_risspam:
|
build_risspam:
|
||||||
@echo "Compiling 12bitfloat_risspam project."
|
@echo "Compiling 12bitfloat_risspam project."
|
||||||
cd 12bitfloat_rust/risspam && cargo run --release && cp target/release/risspam ../../
|
cd 12bitfloat_rust/risspam && cargo run --release && cp target/release/risspam ../../
|
||||||
|
|
||||||
|
|
||||||
run: run_spam wl run_not_spam
|
run: run_spam wl run_not_spam
|
||||||
run_risspam: run_spam_risspam run_not_spam_risspam
|
run_risspam: run_spam_risspam run_not_spam_risspam
|
||||||
|
|
||||||
@ -36,7 +36,6 @@ run_spam_risspam:
|
|||||||
run_not_spam_risspam:
|
run_not_spam_risspam:
|
||||||
@./risspam ./not_spam/*.txt
|
@./risspam ./not_spam/*.txt
|
||||||
|
|
||||||
|
|
||||||
valgrind: build
|
valgrind: build
|
||||||
valgrind ./isspam ./spam/*.txt
|
valgrind ./isspam ./spam/*.txt
|
||||||
|
|
||||||
|
3
bench.py
3
bench.py
@ -8,4 +8,7 @@ print("Time C:",time.time() - time_start)
|
|||||||
time_start = time.time()
|
time_start = time.time()
|
||||||
subprocess.check_output('./risspam -p books/*.txt', shell=True)
|
subprocess.check_output('./risspam -p books/*.txt', shell=True)
|
||||||
print("Time Rust:",time.time() - time_start)
|
print("Time Rust:",time.time() - time_start)
|
||||||
|
time_start = time.time()
|
||||||
|
subprocess.check_output('./isspam_cpp books/*.txt', shell=True)
|
||||||
|
print("Time CPP:",time.time() - time_start)
|
||||||
print("***end benchmark***")
|
print("***end benchmark***")
|
||||||
|
@ -98,7 +98,7 @@ void* analyze_file(void* arg) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
char *saveptr;
|
char *saveptr;
|
||||||
char* token = strtok_r(text, " .?!;:\n", &saveptr);
|
char* token = strtok_r(text, " \f\v\r\n\t", &saveptr);
|
||||||
while (token != NULL) {
|
while (token != NULL) {
|
||||||
word_count++;
|
word_count++;
|
||||||
|
|
||||||
@ -117,7 +117,7 @@ void* analyze_file(void* arg) {
|
|||||||
forbidden_count++;
|
forbidden_count++;
|
||||||
}
|
}
|
||||||
|
|
||||||
token = strtok_r(NULL, " .?!;:\n", &saveptr);
|
token = strtok_r(NULL, " \f\v\r\n\t", &saveptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
result->total_word_count = word_count;
|
result->total_word_count = word_count;
|
||||||
@ -180,4 +180,4 @@ int main(int argc, char *argv[]) {
|
|||||||
printf("Word count per sentence: %.6f\n", word_count_per_sentence);
|
printf("Word count per sentence: %.6f\n", word_count_per_sentence);
|
||||||
printf("Total files read: %d\n", (int)(argc - 1));
|
printf("Total files read: %d\n", (int)(argc - 1));
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
129
retoor_c/isspam.cpp
Normal file
129
retoor_c/isspam.cpp
Normal file
@ -0,0 +1,129 @@
|
|||||||
|
// Author: retoor@molodetz.nl
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <fstream>
|
||||||
|
#include <string>
|
||||||
|
#include <vector>
|
||||||
|
#include <thread>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <sstream>
|
||||||
|
|
||||||
|
#define FORBIDDEN_WORDS_COUNT 40
|
||||||
|
|
||||||
|
const std::unordered_set<std::string> forbidden_words = {
|
||||||
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
||||||
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
||||||
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
||||||
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds",
|
||||||
|
};
|
||||||
|
|
||||||
|
struct AnalysisResult {
|
||||||
|
std::string filename;
|
||||||
|
long long total_word_count = 0;
|
||||||
|
long long total_capitalized_count = 0;
|
||||||
|
long long total_sentence_count = 0;
|
||||||
|
long long total_number_count = 0;
|
||||||
|
long long total_forbidden_count = 0;
|
||||||
|
};
|
||||||
|
|
||||||
|
std::string read_file(const std::string& filename) {
|
||||||
|
std::ifstream file(filename);
|
||||||
|
if (!file) {
|
||||||
|
std::cerr << "File doesn't exist: " << filename << std::endl;
|
||||||
|
return "";
|
||||||
|
}
|
||||||
|
|
||||||
|
std::ostringstream content;
|
||||||
|
content << file.rdbuf(); // Read the entire file into a string
|
||||||
|
return content.str();
|
||||||
|
}
|
||||||
|
|
||||||
|
void analyze_file(AnalysisResult& result) {
|
||||||
|
std::string text = read_file(result.filename);
|
||||||
|
if (!text.empty()) {
|
||||||
|
long long word_count = 0;
|
||||||
|
long long capitalized_count = 0;
|
||||||
|
long long sentence_count = 0;
|
||||||
|
long long number_count = 0;
|
||||||
|
long long forbidden_count = 0;
|
||||||
|
|
||||||
|
for (char c : text) {
|
||||||
|
if (c == '.') {
|
||||||
|
sentence_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::istringstream stream(text);
|
||||||
|
std::string token;
|
||||||
|
while (stream >> token) {
|
||||||
|
word_count++;
|
||||||
|
|
||||||
|
if (std::isupper(token[0])) {
|
||||||
|
capitalized_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (std::any_of(token.begin(), token.end(), ::isdigit)) {
|
||||||
|
number_count++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (forbidden_words.find(token) != forbidden_words.end()) {
|
||||||
|
forbidden_count++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result.total_word_count = word_count;
|
||||||
|
result.total_capitalized_count = capitalized_count;
|
||||||
|
result.total_sentence_count = sentence_count;
|
||||||
|
result.total_number_count = number_count;
|
||||||
|
result.total_forbidden_count = forbidden_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[]) {
|
||||||
|
if (argc < 2) {
|
||||||
|
std::cerr << "Usage: " << argv[0] << " <file1> <file2> ... <fileN>" << std::endl;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::thread> threads;
|
||||||
|
std::vector<AnalysisResult> results(argc - 1);
|
||||||
|
|
||||||
|
for (int i = 1; i < argc; i++) {
|
||||||
|
results[i - 1].filename = argv[i];
|
||||||
|
threads.emplace_back(analyze_file, std::ref(results[i - 1]));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (auto& thread : threads) {
|
||||||
|
thread.join();
|
||||||
|
}
|
||||||
|
|
||||||
|
long long total_word_count = 0;
|
||||||
|
long long total_capitalized_count = 0;
|
||||||
|
long long total_sentence_count = 0;
|
||||||
|
long long total_number_count = 0;
|
||||||
|
long long total_forbidden_count = 0;
|
||||||
|
|
||||||
|
for (const auto& result : results) {
|
||||||
|
total_word_count += result.total_word_count;
|
||||||
|
total_capitalized_count += result.total_capitalized_count;
|
||||||
|
total_sentence_count += result.total_sentence_count;
|
||||||
|
total_number_count += result.total_number_count;
|
||||||
|
total_forbidden_count += result.total_forbidden_count;
|
||||||
|
}
|
||||||
|
|
||||||
|
double capitalized_percentage = (total_word_count > 0) ? (static_cast<double>(total_capitalized_count) / total_word_count * 100.0) : 0;
|
||||||
|
double forbidden_percentage = (total_word_count > 0) ? (static_cast<double>(total_forbidden_count) / total_word_count * 100.0) : 0;
|
||||||
|
double word_count_per_sentence = (total_sentence_count > 0) ? (static_cast<double>(total_word_count) / total_sentence_count) : 0;
|
||||||
|
|
||||||
|
std::cout << "\nTotal Words: " << total_word_count << std::endl;
|
||||||
|
std::cout << "Total Capitalized words: " << total_capitalized_count << std::endl;
|
||||||
|
std::cout << "Total Sentences: " << total_sentence_count << std::endl;
|
||||||
|
std::cout << "Total Numbers: " << total_number_count << std::endl;
|
||||||
|
std::cout << "Total Forbidden words: " << total_forbidden_count << std::endl;
|
||||||
|
std::cout << "Capitalized percentage: " << capitalized_percentage << "%" << std::endl;
|
||||||
|
std::cout << "Forbidden percentage: " << forbidden_percentage << "%" << std::endl;
|
||||||
|
std::cout << "Word count per sentence: " << word_count_per_sentence << std::endl;
|
||||||
|
std::cout << "Total files read: " << (argc - 1) << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user