Fix messed up .gitignore and add missing rust files
Some checks failed
isspam build / build (push) Failing after 2m48s
Some checks failed
isspam build / build (push) Failing after 2m48s
This commit is contained in:
parent
60cedddde8
commit
750d18bdab
18
.gitignore
vendored
18
.gitignore
vendored
@ -1,14 +1,14 @@
|
|||||||
.r_history
|
.r_history
|
||||||
.history
|
.history
|
||||||
.vscode
|
.vscode/
|
||||||
publish
|
publish/
|
||||||
books
|
books/
|
||||||
__pycache__
|
__pycache__/
|
||||||
target
|
target/
|
||||||
./isspam.py
|
./isspam.py
|
||||||
isspam
|
/isspam
|
||||||
risspam
|
/risspam
|
||||||
/jisspam
|
/jisspam
|
||||||
isspam_cpp
|
/isspam_cpp
|
||||||
|
/borded_cpp_exec
|
||||||
.build-trigger-2014-12-02 15:26
|
.build-trigger-2014-12-02 15:26
|
||||||
borded_cpp_exec
|
|
||||||
|
|||||||
5
12bitfloat_rust/risspam/.cargo/config.toml
Normal file
5
12bitfloat_rust/risspam/.cargo/config.toml
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
[build]
|
||||||
|
rustflags = [
|
||||||
|
"-Ztls-model=initial-exec",
|
||||||
|
"-Ctarget-cpu=native"
|
||||||
|
]
|
||||||
907
12bitfloat_rust/risspam/src/books.rs
Normal file
907
12bitfloat_rust/risspam/src/books.rs
Normal file
@ -0,0 +1,907 @@
|
|||||||
|
|
||||||
|
pub static FULL_BOOK_PATHS: &[&'static str] = &[
|
||||||
|
"books/0192806807.pdf - Unknown.txt",
|
||||||
|
"books/0_Deep Learning Cookbook - Practical Recipes to Get Started Quickly.txt",
|
||||||
|
"books/0_Deep Learning for Search.txt",
|
||||||
|
"books/0_Deep Learning with Python.txt",
|
||||||
|
"books/10Algorithms-08.txt",
|
||||||
|
"books/1407.7502v3.txt",
|
||||||
|
"books/1491912766_Advanced.txt",
|
||||||
|
"books/18374.txt",
|
||||||
|
"books/2014-data-science-salary-survey.txt",
|
||||||
|
"books/21 Recipes for Mining Twitter.txt",
|
||||||
|
"books/240415.txt",
|
||||||
|
"books/3dprinting.txt",
|
||||||
|
"books/9780077418182.pdf - W. Schiff.txt",
|
||||||
|
"books/A Developer’s Guide to the Semantic Web.txt",
|
||||||
|
"books/Advanced Analytics with Spark - Patterns for Learning from Data at Scale.txt",
|
||||||
|
"books/Advanced Analytics with Spark - Sandy Ryza, Uri Laserson, Sean Owen.txt",
|
||||||
|
"books/AdvancedBashScripting.txt",
|
||||||
|
"books/advanced-microservices.txt",
|
||||||
|
"books/Advanced Techniques in Web Intelligence – Part II.txt",
|
||||||
|
"books/Advanced Techniques in Web Intelligence – Part I.txt",
|
||||||
|
"books/Agile Business Intelligence.txt",
|
||||||
|
"books/Agile Data Science.txt",
|
||||||
|
"books/Agile Estimating and Planning.txt",
|
||||||
|
"books/Agile for Everybody - Creating Fast, Flexible, and Customer First Organizations.txt",
|
||||||
|
"books/Agile Methods - Large-Scale Development, Refactoring, Testing, and Estimation.txt",
|
||||||
|
"books/Agile Retrospectives - Making Good Teams Great.txt",
|
||||||
|
"books/Agile_Software_Development.txt",
|
||||||
|
"books/Agile Testing - A Practical Guide for Testers and Agile Teams.txt",
|
||||||
|
"books/Algorithmic Graph Theory and Sage.txt",
|
||||||
|
"books/Algorithms for Interviews.txt",
|
||||||
|
"books/algoritmos-programacion-Python.txt",
|
||||||
|
"books/a-little-book-of-r-for-time-series.txt",
|
||||||
|
"books/Amazon Web Services in Action.txt",
|
||||||
|
"books/[Andreas_M._Antonopoulos]_Mastering_Bitcoin_Unloc(BookZZ.org).txt",
|
||||||
|
"books/android9developmentcookbook.txt",
|
||||||
|
"books/AndroidForensics.txt",
|
||||||
|
"books/androidprogrammingforbeginners.txt",
|
||||||
|
"books/AndroidProgrammingPushingTheLimits.txt",
|
||||||
|
"books/AndroidSensorProgramming.txt",
|
||||||
|
"books/AndroidUIDesign.txt",
|
||||||
|
"books/AngualrJS Fundamentals.txt",
|
||||||
|
"books/angular-2-test-driven-development-2nd.txt",
|
||||||
|
"books/Angular2.txt",
|
||||||
|
"books/angular6forenterprise-readywebapplications.txt",
|
||||||
|
"books/Angular in Action.txt",
|
||||||
|
"books/AngularJS by Example.txt",
|
||||||
|
"books/AngularJS by Example - Unknown.txt",
|
||||||
|
"books/AngularJsNoviceToNinja.txt",
|
||||||
|
"books/AngularJS.txt",
|
||||||
|
"books/Angular.txt",
|
||||||
|
"books/angular_upandrunning.txt",
|
||||||
|
"books/AnIntroductionToGCC.txt",
|
||||||
|
"books/AnIntroductionToGNUMakeTool.txt",
|
||||||
|
"books/An Introduction to Information Retreival.txt",
|
||||||
|
"books/An Introduction to Machine Learning Interpretability.txt",
|
||||||
|
"books/antitextbookGo.txt",
|
||||||
|
"books/Anything You Want - 40 Lessons for a New Kind of Entrepreneur.txt",
|
||||||
|
"books/Apache Kafka Cookbook.txt",
|
||||||
|
"books/Apache Mesos Cookbook.txt",
|
||||||
|
"books/Apache Sqoop Cookbook.txt",
|
||||||
|
"books/ApacheTomcatCookbook.txt",
|
||||||
|
"books/API Design Cookbook.txt",
|
||||||
|
"books/api-driven-devops.txt",
|
||||||
|
"books/APIs A Strategy Guide.txt",
|
||||||
|
"books/Applied Text Analysis with Python - Enabling Language Aware Data Products with Machine Learning.txt",
|
||||||
|
"books/AprendiendoJavaScript(spanish).txt",
|
||||||
|
"books/Architecting Modern Data Platforms - A Guide To Enterprise Hadoop At Scale.txt",
|
||||||
|
"books/architectingmodernjavaeeapplications.txt",
|
||||||
|
"books/Arduino_Succinctly.txt",
|
||||||
|
"books/artificialintelligenceinthe21stcentury.txt",
|
||||||
|
"books/Art of Agile Development.txt",
|
||||||
|
"books/artofdatascience.txt",
|
||||||
|
"books/aspectos_avanzados_en_seguridad_en_redes_modulos.txt",
|
||||||
|
"books/aspnetcore2andangular5.txt",
|
||||||
|
"books/ASPNetCore.txt",
|
||||||
|
"books/aspnetmvc4_Succinctly.txt",
|
||||||
|
"books/ASP.NET_MVC_Succinctly.txt",
|
||||||
|
"books/Atomic Habits - An Easy & Proven Way to Build Good Habits & Break Bad Ones.txt",
|
||||||
|
"books/autocad2019beginningandintermediate.txt",
|
||||||
|
"books/autocad20203dmodeling.txt",
|
||||||
|
"books/autodeskrevit2020architecture.txt",
|
||||||
|
"books/Automate the Boring Stuff with Python.txt",
|
||||||
|
"books/A Workflow Approach to Stream Processing.txt",
|
||||||
|
"books/Bad Data Handbook - Cleaning Up The Data So You Can Get Back To Work.txt",
|
||||||
|
"books/bashcookbook.txt",
|
||||||
|
"books/Bash Guide for Beginners.txt",
|
||||||
|
"books/BasicsProgrammableLogicControllerPrinciples.txt",
|
||||||
|
"books/Bayesian_computation_with_R-libre.txt",
|
||||||
|
"books/Bayesian Networks and Influence Diagrams A Guide to Construction and Analysis.txt",
|
||||||
|
"books/Bayesian Reasoning and Machine Learning .txt",
|
||||||
|
"books/bdd-in-action.txt",
|
||||||
|
"books/BDD.txt",
|
||||||
|
"books/Beautiful Code.txt",
|
||||||
|
"books/Beautiful_Code.txt",
|
||||||
|
"books/Beautiful Data.txt",
|
||||||
|
"books/Beautiful Visualization.txt",
|
||||||
|
"books/become-ninja-angular2.txt",
|
||||||
|
"books/Beginning Amazon Web Services with Node.js.txt",
|
||||||
|
"books/BeginningAndroidGames.txt",
|
||||||
|
"books/BeginningJSON.txt",
|
||||||
|
"books/Big_Data_Analytics_with_R.txt",
|
||||||
|
"books/Big Data Analytics with Spark - A Practitioner's Guide to Using Spark for Large Scale Data Analysis.txt",
|
||||||
|
"books/Big Data, Data Mining and Machine Learning.txt",
|
||||||
|
"books/Big Data For Dummies.txt",
|
||||||
|
"books/Big Data Glossary.txt",
|
||||||
|
"books/Blockchain.txt",
|
||||||
|
"books/bookL.txt",
|
||||||
|
"books/book-no-solutions-aug-21-2014.txt",
|
||||||
|
"books/book.txt",
|
||||||
|
"books/BootstrapCookbook.txt",
|
||||||
|
"books/build-apis-you-wont-hate.txt",
|
||||||
|
"books/Building Adaptable Software with Microservices.txt",
|
||||||
|
"books/Building Evolutionary Architectures.txt",
|
||||||
|
"books/Building Hypermedia APIs with HTML5 and Node.txt",
|
||||||
|
"books/Building Hypermedia APIs with HTML5 and No - Mike Amundsen.txt",
|
||||||
|
"books/Building Machine Learning Projects with TensorFlow.txt",
|
||||||
|
"books/Building Machine Learning Systems with Python.txt",
|
||||||
|
"books/building-microservices-designing-fine-grained-systems.txt",
|
||||||
|
"books/Building Microservices.txt",
|
||||||
|
"books/buildingrestfulpythonwebservices - Unknown.txt",
|
||||||
|
"books/Building-web-apps-with-Node.js.txt",
|
||||||
|
"books/BuildingWebAppsWithNode.js.txt",
|
||||||
|
"books/Business Adventures - Twelve Classic Tales from the World of Wall Street.txt",
|
||||||
|
"books/Business Intelligence Data Mining and Optimization for Decision Making.txt",
|
||||||
|
"books/business-models-for-data-economy.txt",
|
||||||
|
"books/Can I Freeze It_ _ How to Use the Most Ver - Susie Theodorou.txt",
|
||||||
|
"books/Category Theory for Computer Science.txt",
|
||||||
|
"books/Category Theory for Computer Science - Unknown.txt",
|
||||||
|
"books/C++_CreatingGamesStepByStepGUIDE.txt",
|
||||||
|
"books/Chapter-13-Association-Rules.txt",
|
||||||
|
"books/Chapter-14-Cluster-Analysis.txt",
|
||||||
|
"books/Chapter-16-Regression-Based-Forecasting.txt",
|
||||||
|
"books/Chapter-1-Introduction.txt",
|
||||||
|
"books/Chapter-2-Overview-of-the-Data-Mining-Process.txt",
|
||||||
|
"books/Chapter-3-Data-Visualization.txt",
|
||||||
|
"books/Chapter 4_Dimension Reduction (Data Mining - Nitin R. Patel.txt",
|
||||||
|
"books/Chapter-4-Dimension-Reduction.txt",
|
||||||
|
"books/Chapter-5-Evaluating-Classification-and-Predictive-Performance.txt",
|
||||||
|
"books/Chapter-6-Multiple-Linear-Regression.txt",
|
||||||
|
"books/Chapter-7-k-Nearest-Neighbors--k-NN-.txt",
|
||||||
|
"books/Chapter-8-Naive-Bayes.txt",
|
||||||
|
"books/Chapter-9-Classification-and-Regression-Trees.txt",
|
||||||
|
"books/Christian Rudder-Dataclysm_ who we are (when we think no one's looking)-Crown (2014).txt",
|
||||||
|
"books/ciml-v0_9-all.txt",
|
||||||
|
"books/Classic Computer Science Problems in Python.txt",
|
||||||
|
"books/classicgamedesign.txt",
|
||||||
|
"books/classicshellscripting.txt",
|
||||||
|
"books/Clean Code - A Handbook of Agile Software Craftsmanship.txt",
|
||||||
|
"books/clean-coder-conduct-professional-programmers.txt",
|
||||||
|
"books/Clean Code.txt",
|
||||||
|
"books/cloudcomputingbasics_aselfteachingintroduction.txt",
|
||||||
|
"books/Cloud Native DevOps with Kubernetes.txt",
|
||||||
|
"books/Cloud Native Patterns - Designing change tolerant software.txt",
|
||||||
|
"books/Code Complete - A Practical Handbook of Software Construction.txt",
|
||||||
|
"books/Code Complete.txt",
|
||||||
|
"books/Collaborative filtering.txt",
|
||||||
|
"books/Collective Intelligence in Action.txt",
|
||||||
|
"books/Collective Intelligence.txt",
|
||||||
|
"books/Competing Against Luck - The Story of Innovation and Customer Choice.txt",
|
||||||
|
"books/Compilers-Principles-TechniquesAndTools2ndEdition.txt",
|
||||||
|
"books/Computational Intelligence.txt",
|
||||||
|
"books/Concurrency in Go_ Tools and Techniques fo - Katherine Cox-Buday.txt",
|
||||||
|
"books/Concurrency in Go - Tools and Techniques for Developers.txt",
|
||||||
|
"books/ConsumersurplusatUber_PR.txt",
|
||||||
|
"books/Contagious - Why Things Catch On.txt",
|
||||||
|
"books/Conversion_Optimization.txt",
|
||||||
|
"books/cover - Jolcia.txt",
|
||||||
|
"books/Cplusplus_Today.txt",
|
||||||
|
"books/cprogramming_aselfteachingintroduction.txt",
|
||||||
|
"books/C Programming - PhD Rajiv Chopra.txt",
|
||||||
|
"books/Create a Data Driven Organization.txt",
|
||||||
|
"books/Creating a Data-Driven Organization - Carl Anderson.txt",
|
||||||
|
"books/Crossing the Chasm - Marketing and Selling Disruptive Products to Mainstream Customers.txt",
|
||||||
|
"books/Crypto101.txt",
|
||||||
|
"books/CSharpProfesional.txt",
|
||||||
|
"books/C_Sharp_Succinctly.txt",
|
||||||
|
"books/CSS3_Succinctly.txt",
|
||||||
|
"books/CSS3.txt",
|
||||||
|
"books/CSS FlexBox.txt",
|
||||||
|
"books/CssGridLayout.txt",
|
||||||
|
"books/CSS in Depth.txt",
|
||||||
|
"books/CSSMaster2ndEdition.txt",
|
||||||
|
"books/csspocketreference.txt",
|
||||||
|
"books/CSS-Programming-Cookbook.txt",
|
||||||
|
"books/CSS_Secrets.txt",
|
||||||
|
"books/css_thedefinitiveguide.txt",
|
||||||
|
"books/CursoHTML5.txt",
|
||||||
|
"books/D3-Tips-and-Tricks.txt",
|
||||||
|
"books/Daily Rituals - How Great Minds Make Time, Find Inspiration, and Get to Work.txt",
|
||||||
|
"books/Dark Web Exploring and Data Mining the Dark Side of the Web.txt",
|
||||||
|
"books/Dart-A-Modern-Web-Language.txt",
|
||||||
|
"books/dart_in_action.txt",
|
||||||
|
"books/dart_programming_tutorial.txt",
|
||||||
|
"books/Data_Algorithms (1).txt",
|
||||||
|
"books/Data_Algorithms.txt",
|
||||||
|
"books/Data Analysis with Open Source Tools.txt",
|
||||||
|
"books/Data_Analytics_in_Sports.txt",
|
||||||
|
"books/Data_Analytics_with_Hadoop (1).txt",
|
||||||
|
"books/datacleaning_pocketprimer.txt",
|
||||||
|
"books/data-driven.txt",
|
||||||
|
"books/datalog2011-dedalus.txt",
|
||||||
|
"books/Data Mashups in R.txt",
|
||||||
|
"books/Data Mining and Statistics for Decision Making.txt",
|
||||||
|
"books/DataMining-ch1.txt",
|
||||||
|
"books/DataMining-ch2.txt",
|
||||||
|
"books/DataMining-ch3.txt",
|
||||||
|
"books/DataMining-ch4.txt",
|
||||||
|
"books/DataMining-ch4 - Unknown.txt",
|
||||||
|
"books/DataMining-ch5.txt",
|
||||||
|
"books/DataMining-ch6.txt",
|
||||||
|
"books/DataMining-ch7.txt",
|
||||||
|
"books/Datamining-ch8.txt",
|
||||||
|
"books/Data Mining Concepts and Techniques.txt",
|
||||||
|
"books/Data Mining - Concepts, Models, Methods, and Algorithms.txt",
|
||||||
|
"books/Data Mining Methods for Recommender Systems.txt",
|
||||||
|
"books/Data Mining - Practical Machine Learning Tools and Techniques.txt",
|
||||||
|
"books/datamining.txt",
|
||||||
|
"books/Data_Science_from_Scratch (1).txt",
|
||||||
|
"books/Data Science from Scratch - Joel Grus.txt",
|
||||||
|
"books/Data Science from Scratch.txt",
|
||||||
|
"books/Data_Science_from_Scratch.txt",
|
||||||
|
"books/Data Source Handbook.txt",
|
||||||
|
"books/Data Stream Mining - A Practical Approach.txt",
|
||||||
|
"books/Data Structures and Algorithms.txt",
|
||||||
|
"books/Data Structures and Algorithms with JavaScript.txt",
|
||||||
|
"books/datastyle.txt",
|
||||||
|
"books/Data Visualization with D3.js Cookbook.txt",
|
||||||
|
"books/datavisualizationwithpythonandjavascript.txt",
|
||||||
|
"books/Data_Visualization_with_Python_and_JavaScript.txt",
|
||||||
|
"books/data-wrangling-cheatsheet.txt",
|
||||||
|
"books/Data Wrangling with JavaScript.txt",
|
||||||
|
"books/Data_Wrangling_with_Python (1).txt",
|
||||||
|
"books/dbSecurityBook.txt",
|
||||||
|
"books/DE_0_PHYTON -.txt",
|
||||||
|
"books/Dealing with China - An Insider Unmasks the New Economic Superpower.txt",
|
||||||
|
"books/DebianHandBookSpanish.txt",
|
||||||
|
"books/Debugging Teams - Better Productivity through Collaboration.txt",
|
||||||
|
"books/Decision Support Systems For Business Intelligence.txt",
|
||||||
|
"books/Deep Work - Cal Newport.txt",
|
||||||
|
"books/Deep Work - Rules for Focused Success in a Distracted World.txt",
|
||||||
|
"books/Dependency Injection Principles, Practices, and Patterns.txt",
|
||||||
|
"books/Design Driven Testing.txt",
|
||||||
|
"books/Design for How People Think - Using Brain Science to Build Better Products.txt",
|
||||||
|
"books/Designing Data-Intensive Applications - The Big Ideas Behind Reliable, Scalable and Maintainable Systems.txt",
|
||||||
|
"books/Designing_Data_Intensive_Applications.txt",
|
||||||
|
"books/Designing Data-Intensive Web Applications.txt",
|
||||||
|
"books/Designing Data Visualizations.txt",
|
||||||
|
"books/Designing Interfaces - Patterns for Effective Interaction Design.txt",
|
||||||
|
"books/DesigningUXForms.txt",
|
||||||
|
"books/designingwebapis.txt",
|
||||||
|
"books/Designing with the Mind in Mind Simple Guide to Understanding User Interface Design Rules.txt",
|
||||||
|
"books/Designing with the Mind in Mind Simple Gui - Unknown.txt",
|
||||||
|
"books/DesignPatterns.txt",
|
||||||
|
"books/developer-testing.txt",
|
||||||
|
"books/Developing Large Web Applications.txt",
|
||||||
|
"books/developing-microservices-node-js.txt",
|
||||||
|
"books/Developing Microservices with Node.js.txt",
|
||||||
|
"books/devops-2-0-toolkit.txt",
|
||||||
|
"books/devops-2-1-toolkit-deploying-monitoring.txt",
|
||||||
|
"books/DevOps Automation Cookbook.txt",
|
||||||
|
"books/devops-web-development.txt",
|
||||||
|
"books/DiveIntoPython.txt",
|
||||||
|
"books/django2webdevelopmentcookbook.txt",
|
||||||
|
"books/docker-bootcamp.txt",
|
||||||
|
"books/DockerContainerizationCookbook.txt",
|
||||||
|
"books/docker-cookbook-solutions-examples.txt",
|
||||||
|
"books/docker-in-action.txt",
|
||||||
|
"books/Docker in Action.txt",
|
||||||
|
"books/docker-in-practice.txt",
|
||||||
|
"books/Docker in Practice.txt",
|
||||||
|
"books/docker-orchestration.txt",
|
||||||
|
"books/Docker_ Up and Running - Matthias, Karl.txt",
|
||||||
|
"books/Docker Up and Running.txt",
|
||||||
|
"books/Docker_Up_and_Running.txt",
|
||||||
|
"books/domain-driven-design-distilled.txt",
|
||||||
|
"books/DotNETCore.txt",
|
||||||
|
"books/ECMAScript_6.txt",
|
||||||
|
"books/effectiveawk.txt",
|
||||||
|
"books/Effective_DevOps.txt",
|
||||||
|
"books/EF JS sonsivri.txt",
|
||||||
|
"books/Elasticsearch Blueprints - A practical project-based guide to generating compelling search solutions using the dynamic and powerful features of Elasticsearch.txt",
|
||||||
|
"books/Elasticsearch Cookbook.txt",
|
||||||
|
"books/Elasticsearch in Action.txt",
|
||||||
|
"books/Elasticsearch Indexing - Improve search experiences with Elasticsearch's powerful indexing functionality.txt",
|
||||||
|
"books/Elasticsearch Server.txt",
|
||||||
|
"books/Elasticsearch - The Definitive Guide.txt",
|
||||||
|
"books/ElasticSearchTutorial.txt",
|
||||||
|
"books/elasticsearch.txt",
|
||||||
|
"books/ElectronGettingStarted.txt",
|
||||||
|
"books/Electron in Action.txt",
|
||||||
|
"books/ElectronQuickIntro.txt",
|
||||||
|
"books/Elegant_SciPy.txt",
|
||||||
|
"books/Elixir in Action.txt",
|
||||||
|
"books/Elm Accelerated - James Porter.txt",
|
||||||
|
"books/Eloquent_JavaScript.txt",
|
||||||
|
"books/ELS2015.txt",
|
||||||
|
"books/embeddedvision.txt",
|
||||||
|
"books/Emergent Web Intelligence Advanced Information Retrieval.txt",
|
||||||
|
"books/Emergent Web Intelligence Advanced Semantic Technologies.txt",
|
||||||
|
"books/Enterprise_Big_Data_Lake (1).txt",
|
||||||
|
"books/EntityFrameworkCodeFirst.txt",
|
||||||
|
"books/EntityFrameworkCore.txt",
|
||||||
|
"books/entrepreneur revolution.txt",
|
||||||
|
"books/Eric Ries - The Lean Startup.txt",
|
||||||
|
"books/ESLII_print10.txt",
|
||||||
|
"books/Essential JavaScript Design Patterns.txt",
|
||||||
|
"books/eurosys10-boom 2.txt",
|
||||||
|
"books/eurosys10-boom.txt",
|
||||||
|
"books/expert-javascript.txt",
|
||||||
|
"books/expertpythonprogramming - Unknown.txt",
|
||||||
|
"books/Exploring Design Pattern For Dummies.txt",
|
||||||
|
"books/express-in-action.txt",
|
||||||
|
"books/Facebook - A Focus on Efficieny.txt",
|
||||||
|
"books/fashioning-data.txt",
|
||||||
|
"books/fcdae.txt",
|
||||||
|
"books/Feature Engineering for Machine Learning - Principles and Techniques for Data Scientists.txt",
|
||||||
|
"books/FlaskReleaseMarch03-2017.txt",
|
||||||
|
"books/flaskwebdevelopment.txt",
|
||||||
|
"books/Fluent_Python.txt",
|
||||||
|
"books/Foundations_for_Analytics_with_Python.txt",
|
||||||
|
"books/Foundations for Architecting Data Solutions - Managing Successful Data Projects.txt",
|
||||||
|
"books/Framing-Analytics-Requirements-v5.13.txt",
|
||||||
|
"books/FRIED_Jason_-_Rework.txt",
|
||||||
|
"books/FullStackJsDevelopmentWithMEAN.txt",
|
||||||
|
"books/Fundamentals of Data Visualization - A Primer on Making Informative and Compelling Figures.txt",
|
||||||
|
"books/gamedevelopmentusingpython.txt",
|
||||||
|
"books/GameProgrammingForKids.txt",
|
||||||
|
"books/Gaussian Processes for Machine Learning - Carl Edward Rasmussen.txt",
|
||||||
|
"books/Getting_Data_Right_Ch04_PE_Tamr.txt",
|
||||||
|
"books/getting-started-kubernetes-2nd.txt",
|
||||||
|
"books/GettingStartedWithASP.Net4.5WebForms.txt",
|
||||||
|
"books/Getting Started with Kubernetes.txt",
|
||||||
|
"books/Getting Started with Kudu - Jean-Marc Spaggiari.txt",
|
||||||
|
"books/GettingStartedWithLINQPad.txt",
|
||||||
|
"books/GettingStartedWithReactJs.txt",
|
||||||
|
"books/Getting Started with RStudio.txt",
|
||||||
|
"books/Getting Started with Storm.txt",
|
||||||
|
"books/Getting Started with TensorFlow.txt",
|
||||||
|
"books/Git - Giant Undo Button.txt",
|
||||||
|
"books/Git Internals.txt",
|
||||||
|
"books/GitInternals.txt",
|
||||||
|
"books/Git Internals - Unknown.txt",
|
||||||
|
"books/Global UX Design and Research in a Connected World.txt",
|
||||||
|
"books/GNU_C_LibraryReferenceManual.txt",
|
||||||
|
"books/GNULinuxAdvancedAdminstration.txt",
|
||||||
|
"books/go-building-web-applications.txt",
|
||||||
|
"books/go-design-patterns.txt",
|
||||||
|
"books/go-in-action.txt",
|
||||||
|
"books/go-in-practice.txt",
|
||||||
|
"books/go-programming-blueprints-2nd.txt",
|
||||||
|
"books/Go Recipes.txt",
|
||||||
|
"books/go.txt",
|
||||||
|
"books/go-web-programming.txt",
|
||||||
|
"books/Graph Algorithms - Practical Examples in Apache Spark and Neo4j.txt",
|
||||||
|
"books/Graph Databases - Ian Robinson, Jim Webber.txt",
|
||||||
|
"books/Graph Databases.txt",
|
||||||
|
"books/GraphDatabases.txt",
|
||||||
|
"books/Graphics of Large Datasets.txt",
|
||||||
|
"books/greppocketref.txt",
|
||||||
|
"books/Grokking Deep Learning.txt",
|
||||||
|
"books/GrowthHacking.txt",
|
||||||
|
"books/gsl_stats.txt",
|
||||||
|
"books/Hackers and Painters.txt",
|
||||||
|
"books/Hadoop in the Enterprise - Architecture - A Guide to Successful Integration.txt",
|
||||||
|
"books/Hadoop_Security.txt",
|
||||||
|
"books/Hadoop_ The Definitive Guide - Tom White.txt",
|
||||||
|
"books/Hadoop - The Definitive Guide.txt",
|
||||||
|
"books/hadoop-what-you-need-to-know.txt",
|
||||||
|
"books/hadoop-with-python.txt",
|
||||||
|
"books/HAL.txt",
|
||||||
|
"books/Handbook_Pt1.txt",
|
||||||
|
"books/Handbook_Pt2.txt",
|
||||||
|
"books/Handbook_Pt3.txt",
|
||||||
|
"books/Handbook_Pt4.txt",
|
||||||
|
"books/Hands-On Design Patterns with React Native - Mateusz Grzesiukiewicz.txt",
|
||||||
|
"books/hands-onfullstackdevelopmentwithspringboot20andreact.txt",
|
||||||
|
"books/hands-onfullstackwebdevelopmentwithangular6andlaravel5.txt",
|
||||||
|
"books/Hands-on Machine Learning with Scikit-Lear - Aurelien Geron.txt",
|
||||||
|
"books/Hands-On Machine Learning with Scikit Learn and TensorFlow - Concepts, Tools, and Techniques to Build Intelligent Systems.txt",
|
||||||
|
"books/Hands_On_Machine_Learning_with_Scikit_Learn_and_TensorFlow.txt",
|
||||||
|
"books/Hands On Machine Learning with Scikit Learn, Keras, and Tensorflow - Concepts, Tools, and Techniques to Build Intelligent Systems (Updated Release).txt",
|
||||||
|
"books/hdlwithdigitaldesign.txt",
|
||||||
|
"books/HeadFirstC.txt",
|
||||||
|
"books/Healing With Herbs and Spices_ Heal Your B - Simone McGrath.txt",
|
||||||
|
"books/HelloAndroid.txt",
|
||||||
|
"books/highperformanceimages.txt",
|
||||||
|
"books/High Performance JavaScript.txt",
|
||||||
|
"books/High_Performance_Mobile_Web.txt",
|
||||||
|
"books/HowToBuildAndScaleWithMicroServices.txt",
|
||||||
|
"books/HowToBuildAndScaleWithMicroServices - Unknown.txt",
|
||||||
|
"books/How to Live Forever - Alok Jha.txt",
|
||||||
|
"books/How to Pass Exams - Dominic O'Brien.txt",
|
||||||
|
"books/HTML5 and JavaScript Web Apps.txt",
|
||||||
|
"books/HTML5 Architecture.txt",
|
||||||
|
"books/HTML5CanvasReference.txt",
|
||||||
|
"books/HTML5 Canvas.txt",
|
||||||
|
"books/HTML5 Cookbook.txt",
|
||||||
|
"books/HTML5 & CSS3 FOR THE REAL WORLD.txt",
|
||||||
|
"books/HTML5-Programming-Cookbook.txt",
|
||||||
|
"books/HTML5SecurityCheatSheet.txt",
|
||||||
|
"books/HTML5_Vulnerabilities.txt",
|
||||||
|
"books/htmlcss2sample.txt",
|
||||||
|
"books/HTTP - 2 in Action.txt",
|
||||||
|
"books/human javascript - Henrik Joreteg.txt",
|
||||||
|
"books/Human JavaScript.txt",
|
||||||
|
"books/Identity and Data Security for Web Development Best Practices.txt",
|
||||||
|
"books/I Heart Logs Event Data, Stream Processing, and Data Integration.txt",
|
||||||
|
"books/Information Architecture For the Web and Beyond.txt",
|
||||||
|
"books/Information_Architecture_Fourth_Edition.txt",
|
||||||
|
"books/Information Theory, Inference, and Learning Algorithms .txt",
|
||||||
|
"books/Innovations in Classification, Data Science, and Information Systems.txt",
|
||||||
|
"books/Interactive Data Visualization for the Web.txt",
|
||||||
|
"books/Interactive_Data_Visualization_for_the_Web.txt",
|
||||||
|
"books/Interactive Data Visualization for the Web - Unknown.txt",
|
||||||
|
"books/Interview Preparations Kit - Software Engineer.txt",
|
||||||
|
"books/IntouchScriptingAndLogicGuide.txt",
|
||||||
|
"books/Introducing-Go.txt",
|
||||||
|
"books/introducingregularexpressions.txt",
|
||||||
|
"books/introduction-machine-learning-python.txt",
|
||||||
|
"books/introductionto3dgameprogrammingwithdirectx12.txt",
|
||||||
|
"books/Introduction to Docker.txt",
|
||||||
|
"books/IntroductionToDocker.txt",
|
||||||
|
"books/IntroductionToLinux.txt",
|
||||||
|
"books/IntroductionToNginx.txt",
|
||||||
|
"books/IntroToCrypto.txt",
|
||||||
|
"books/InventYourOwnGamesWithPython.txt",
|
||||||
|
"books/IPSUR.txt",
|
||||||
|
"books/ISLR Fourth Printing.txt",
|
||||||
|
"books/Java2.txt",
|
||||||
|
"books/JavaDesignPatterns.txt",
|
||||||
|
"books/JavaDevelopmentOnLnx.txt",
|
||||||
|
"books/JavaFXCookBook.txt",
|
||||||
|
"books/Java-JDBC.txt",
|
||||||
|
"books/JavaMultithreadingAndConcurrency.txt",
|
||||||
|
"books/JavaNIOCookbook.txt",
|
||||||
|
"books/JavaPersistenceAPI.txt",
|
||||||
|
"books/JavaScript A Beginners Guide .txt",
|
||||||
|
"books/JavaScript Cookbook.txt",
|
||||||
|
"books/JavaScriptInterviewQuestions.txt",
|
||||||
|
"books/JavaScript Patterns.txt",
|
||||||
|
"books/JavaScript_Succinctly.txt",
|
||||||
|
"books/JavaScript The Definitive Guide.txt",
|
||||||
|
"books/javascript_the_good_parts.txt",
|
||||||
|
"books/JavaScript The Good Parts.txt",
|
||||||
|
"books/JavaScript Web Applications.txt",
|
||||||
|
"books/JavaStartingIntoHibernate.txt",
|
||||||
|
"books/Java-ThinkJava.txt",
|
||||||
|
"books/Jenkins 2 - Up and Running - Evolve Your Deployment Pipeline for Next Generation Automation.txt",
|
||||||
|
"books/jenkins-the-definitive-guide.txt",
|
||||||
|
"books/JQueryHost.txt",
|
||||||
|
"books/JQueryNoviceToNinja.txt",
|
||||||
|
"books/jQuery_Succinctly.txt",
|
||||||
|
"books/JsNoviceToNinja2ndEdition.txt",
|
||||||
|
"books/JsNoviceToNinja.txt",
|
||||||
|
"books/Jurans Quality Handbook.txt",
|
||||||
|
"books/Kafka Streams in Action - Real time apps and microservices with the Kafka Streaming API.txt",
|
||||||
|
"books/Kafka - The Definitive Guide - Real Time Data and Stream Processing at Scale.txt",
|
||||||
|
"books/Kafka - The Definitive Guide.txt",
|
||||||
|
"books/Kubernetes Cookbook.txt",
|
||||||
|
"books/Kubernetes in Action.txt",
|
||||||
|
"books/Kubernetes Management Design Patterns With Docker, CoreOS Linux, and Other Platforms.txt",
|
||||||
|
"books/Kubernetes Microservices with Docker.txt",
|
||||||
|
"books/Kubernetes-Microservices with Docker.txt",
|
||||||
|
"books/Kuhn_Johnson_Applied_Predictive_Modeling.txt",
|
||||||
|
"books/LaBibliaDeMySQL.pdf.txt",
|
||||||
|
"books/lazy-analysts-guide-to-faster-sql.txt",
|
||||||
|
"books/Lean_Analytics.txt",
|
||||||
|
"books/Lean Customer Development.txt",
|
||||||
|
"books/Lean Enterprise.txt",
|
||||||
|
"books/Lean UX.txt",
|
||||||
|
"books/Learn Functional Programming by Implementing SQL with Underscore.js Presentation.txt",
|
||||||
|
"books/Learning_Agile.txt",
|
||||||
|
"books/Learning Apache Kafka.txt",
|
||||||
|
"books/Learning Apache Kafka - Unknown.txt",
|
||||||
|
"books/Learning Chaos Engineering - Russ Miles.txt",
|
||||||
|
"books/learningconcurrencyinpython - Unknown.txt",
|
||||||
|
"books/Learning Docker.txt",
|
||||||
|
"books/Learning ELK Stack - Build mesmerizing visualizations, analytics, and logs from your data using Elasticsearch, Logstash, and Kibana.txt",
|
||||||
|
"books/learninggnuemacs_3rdedition.txt",
|
||||||
|
"books/learninggraphql.txt",
|
||||||
|
"books/LearningJavaByBuildingAndroidGames.txt",
|
||||||
|
"books/Learning.Java_Oreilly_4th.Edition_Jun.2013.txt",
|
||||||
|
"books/Learning Java - Patrick Niemeyer.txt",
|
||||||
|
"books/Learning JavaScript Design Patterns.txt",
|
||||||
|
"books/learningjavascript.txt",
|
||||||
|
"books/Learning Java.txt",
|
||||||
|
"books/learningjquery3.txt",
|
||||||
|
"books/learningnodejsdevelopment.txt",
|
||||||
|
"books/learningphpmysqlandjavascript.txt",
|
||||||
|
"books/Learning Python, 5th Edition.txt",
|
||||||
|
"books/Learning Python - Mark Lutz.txt",
|
||||||
|
"books/Learning Python - Powerful Object-Oriented Programming.txt",
|
||||||
|
"books/LearningPython.txt",
|
||||||
|
"books/learningreact1.txt",
|
||||||
|
"books/learningroboticsusingpython - Unknown.txt",
|
||||||
|
"books/Learning Single-page Web Application Development.txt",
|
||||||
|
"books/Learning Spark.txt",
|
||||||
|
"books/Learning_Spark.txt",
|
||||||
|
"books/Learning_Swift.txt",
|
||||||
|
"books/learningthebashshell_3rdedition.txt",
|
||||||
|
"books/learningtheviandvimeditors_7thedition.txt",
|
||||||
|
"books/Learning Website Development with Django.txt",
|
||||||
|
"books/learnqt5.txt",
|
||||||
|
"books/lecture-22.txt",
|
||||||
|
"books/Linear Algebra Explained In Four Pages.txt",
|
||||||
|
"books/Linear Algebra.txt",
|
||||||
|
"books/Linked Data - Evolving The Web Into A Global Data Space.txt",
|
||||||
|
"books/Linked Open Data - The Essentials.txt",
|
||||||
|
"books/Linux Bible.txt",
|
||||||
|
"books/Linux Colección completa (2004).txt",
|
||||||
|
"books/LinuxCommandLineSheet.txt",
|
||||||
|
"books/LinuxCookBook.pdf - Pankaj Kumar.txt",
|
||||||
|
"books/linuxdevicedrivers.txt",
|
||||||
|
"books/LinuxEmbeddedDevelopment.txt",
|
||||||
|
"books/Linux From Scratch.txt",
|
||||||
|
"books/LinuxFromScratch.txt",
|
||||||
|
"books/linuxinanutshell.txt",
|
||||||
|
"books/Linux Internals_ Como funciona - Daniel Ezquerra.txt",
|
||||||
|
"books/LinuxKali.txt",
|
||||||
|
"books/LinuxNetworkingCookbook.txt",
|
||||||
|
"books/linuxpocketguide_3rdedition.txt",
|
||||||
|
"books/Linux Pocket.txt",
|
||||||
|
"books/LinuxPracticalSecurityCookBook.txt",
|
||||||
|
"books/LinuxShellScripting.txt",
|
||||||
|
"books/linuxsystemprogramming.txt",
|
||||||
|
"books/LittleInferenceBook.txt",
|
||||||
|
"books/Machine Learning Cheat Sheet.txt",
|
||||||
|
"books/Machine learning for hackers.txt",
|
||||||
|
"books/Machine Learning for Hackers.txt",
|
||||||
|
"books/Machine_Learning_with_R_Second_Edition.txt",
|
||||||
|
"books/Machine Learning with Spark.txt",
|
||||||
|
"books/Machine Learning with TensorFlow.txt",
|
||||||
|
"books/Maintainable JavaScript.txt",
|
||||||
|
"books/Making Isometric Social Real-Time Games with HTML5 CSS3 and JavaScript.txt",
|
||||||
|
"books/Management 3.0; Leading Agile Developers, - Jurgen Appelo.txt",
|
||||||
|
"books/ManualDePowerBuilder.txt",
|
||||||
|
"books/ManualDeSEO.txt",
|
||||||
|
"books/mapping-big-data.txt",
|
||||||
|
"books/MapReduce Design Patterns - Building Effective Algorithms and Analytics for Hadoop and Other Systems.txt",
|
||||||
|
"books/Mastering-Advanced-Analytics-With-Apache-Spark.txt",
|
||||||
|
"books/Mastering_Dart__Master_the_art_of.txt",
|
||||||
|
"books/Mastering ElasticSearch - Extend your knowledge on ElasticSearch, and querying and data handling, along with its internal workings.txt",
|
||||||
|
"books/masteringios12programming.txt",
|
||||||
|
"books/Mastering Kubernetes.txt",
|
||||||
|
"books/masteringmodularjavascript.txt",
|
||||||
|
"books/Mastering Modular JavaScript.txt",
|
||||||
|
"books/masteringpythonnetworking - Unknown.txt",
|
||||||
|
"books/masteringpython - Unknown.txt",
|
||||||
|
"books/masteringregularexpressions.txt",
|
||||||
|
"books/Mastering Regular Expressions.txt",
|
||||||
|
"books/Mastering Web Application Development with Express.txt",
|
||||||
|
"books/masteringxamarinuidevelopment.txt",
|
||||||
|
"books/mesos-in-action.txt",
|
||||||
|
"books/microservices-building-scalable-software.txt",
|
||||||
|
"books/microservices-deployment-cookbook.txt",
|
||||||
|
"books/Microservices Designing Deploying.txt",
|
||||||
|
"books/microservices-docker-microsoft-azure.txt",
|
||||||
|
"books/microservices-flexible-software-architecture.txt",
|
||||||
|
"books/microservices-from-day-one.txt",
|
||||||
|
"books/Microservices Patterns - With examples in Java.txt",
|
||||||
|
"books/microsoftaccess2019programmingwithvbaxmlandasp.txt",
|
||||||
|
"books/microsoftexcel2019programmingwithvbaxmlandasp.txt",
|
||||||
|
"books/microsoftexcelfunctionsandformulas_5e.txt",
|
||||||
|
"books/microsoftoffice2013_365andbeyond.txt",
|
||||||
|
"books/Mining Business Databases.txt",
|
||||||
|
"books/Mining of Data with Complex Structures.txt",
|
||||||
|
"books/Mining of Massive Datasets.txt",
|
||||||
|
"books/Mining Text Data.txt",
|
||||||
|
"books/Mining_the_Social_Web__Second_Edition (1).txt",
|
||||||
|
"books/Mining_the_Social_Web__Second_Edition.txt",
|
||||||
|
"books/Mining the Social Web.txt",
|
||||||
|
"books/Modeling With Data.txt",
|
||||||
|
"books/Modern Java in Action - Lambda, streams, functional and reactive programming.txt",
|
||||||
|
"books/ModernJs.txt",
|
||||||
|
"books/modernpythoncookbook - Unknown.txt",
|
||||||
|
"books/MongoDB3.txt",
|
||||||
|
"books/MongoDB - Applied Design Patterns, Practical Use Cases with the Leading NoSQL Database.txt",
|
||||||
|
"books/MongoDB Applied Design Patterns - Rick Copeland.txt",
|
||||||
|
"books/MongoDB Cookbook.txt",
|
||||||
|
"books/MongoDB - The Definitive Guide.txt",
|
||||||
|
"books/MongoDBTheDefinitiveGuide.txt",
|
||||||
|
"books/Monitoring with Graphite - Jason Dixon.txt",
|
||||||
|
"books/msexcel2016.txt",
|
||||||
|
"books/multimediawebdesignanddevelopment.txt",
|
||||||
|
"books/MySQLPluginDevelopmen.txt",
|
||||||
|
"books/native-docker-clustering-swarm.txt",
|
||||||
|
"books/Natural Language Annotation for Machine Learning.txt",
|
||||||
|
"books/Natural_Language_Annotation_for_Machine_Learning.txt",
|
||||||
|
"books/Natural Language Processing in Action - Understanding, analyzing, and generating text with Python.txt",
|
||||||
|
"books/Natural Language Processing with PyTorch - Build Intelligent Language Applications Using Deep Learning.txt",
|
||||||
|
"books/negron-muntaner-jennifers-butt.txt",
|
||||||
|
"books/NetworkProgrammingIndotNET.txt",
|
||||||
|
"books/Network_Security_Through_Data_Analysis.txt",
|
||||||
|
"books/New Trends in Computational Collective Intelligence.txt",
|
||||||
|
"books/Nodedotjs_Web_Development_Third_Edition.txt",
|
||||||
|
"books/Node for Front-End Developers.txt",
|
||||||
|
"books/NodeJsAdvancedGuide.txt",
|
||||||
|
"books/Node.js By Example.txt",
|
||||||
|
"books/Node.js Design Patterns.txt",
|
||||||
|
"books/Node.js in Action.txt",
|
||||||
|
"books/Node.js Recipes.txt",
|
||||||
|
"books/Node.js the Right Way.txt",
|
||||||
|
"books/NodeJs.txt",
|
||||||
|
"books/Node Up and Running.txt",
|
||||||
|
"books/Node- Up and Running.txt",
|
||||||
|
"books/NoSQLArchitectsGuide.txt",
|
||||||
|
"books/NoSQL Database Technology - A Survey and Comparison of Systems.txt",
|
||||||
|
"books/OraclePL-SQL3Edition.txt",
|
||||||
|
"books/OraclePL-SQL.txt",
|
||||||
|
"books/Oreilly.Beautiful.Data.Jul.2009.txt",
|
||||||
|
"books/O'Reilly Media -- Template for Microsoft W - na na.txt",
|
||||||
|
"books/OReilly.REST.API.Design.Rulebook.Oct.2011.ISBN.1449310508.txt",
|
||||||
|
"books/OReilly Twisted Network Programming Essentials 2nd Edition 2013.txt",
|
||||||
|
"books/Organizational_Profiles.txt",
|
||||||
|
"books/out-of-the-tar-pit.txt",
|
||||||
|
"books/PatternDesignInC++WithQt4.txt",
|
||||||
|
"books/PHP-And-MySql-NoviceToNinja.txt",
|
||||||
|
"books/Postgres.txt",
|
||||||
|
"books/Practical Cloud Security - A Guide for Secure Design and Deployment.txt",
|
||||||
|
"books/practicaldatacleaning.txt",
|
||||||
|
"books/Practical Machine Learning Tools and Techniques.txt",
|
||||||
|
"books/Practical Machine Learning.txt",
|
||||||
|
"books/Practical Node.js.txt",
|
||||||
|
"books/Practical Recommender Systems.txt",
|
||||||
|
"books/Practical Semantic Web and Linked Data Applications.txt",
|
||||||
|
"books/Practical_Statistics_for_Data_Scientists.txt",
|
||||||
|
"books/PrincipiosDeCompiladores1EraEdicion.txt",
|
||||||
|
"books/Principles of Data Quality.txt",
|
||||||
|
"books/Privacy and Big Data.txt",
|
||||||
|
"books/ProbStatBook.txt",
|
||||||
|
"books/pro-continuous-delivery-jenkins-2.txt",
|
||||||
|
"books/pro-docker.txt",
|
||||||
|
"books/Production Ready Microservices.txt",
|
||||||
|
"books/Pro Express.js.txt",
|
||||||
|
"books/Professional Node.js.txt",
|
||||||
|
"books/proGit.txt",
|
||||||
|
"books/Pro GIT.txt",
|
||||||
|
"books/ProgramacionEnC.txt",
|
||||||
|
"books/Programmable Logic Controller - Basic Prin - Lab-Volt.txt",
|
||||||
|
"books/Programming Hive - Edward Capriolo, Dean Wampler.txt",
|
||||||
|
"books/Programming HTML5 Applications.txt",
|
||||||
|
"books/Programming Kubernetes - michael Hausenblas.txt",
|
||||||
|
"books/Programming_Pig_Second_Edition.txt",
|
||||||
|
"books/Programming_Scala_Second_Edition.txt",
|
||||||
|
"books/Programming The Semantic Web.txt",
|
||||||
|
"books/Pro HTML5 Programming.txt",
|
||||||
|
"books/Pro JavaScript Design Patterns.txt",
|
||||||
|
"books/Pro .NET 2.0 Graphics Programming.txt",
|
||||||
|
"books/Pro Node.js for Developers.txt",
|
||||||
|
"books/Pro React.txt",
|
||||||
|
"books/Pro REST API Development with Node.js.txt",
|
||||||
|
"books/pro-vim-2014.txt",
|
||||||
|
"books/pynput.txt",
|
||||||
|
"books/py-quant-econ.txt",
|
||||||
|
"books/Python3CookBook.txt",
|
||||||
|
"books/python3_pocketprimer.txt",
|
||||||
|
"books/PythonBeginnerCheatSheet.txt",
|
||||||
|
"books/Python Cookbook, 2nd Edition.txt",
|
||||||
|
"books/Python Cookbook, 3rd Edition.txt",
|
||||||
|
"books/python-crash-course.txt",
|
||||||
|
"books/pythondataanalysiscookbook - Unknown.txt",
|
||||||
|
"books/Python Data Science Essentials.txt",
|
||||||
|
"books/pythondatascienceessentials - Unknown.txt",
|
||||||
|
"books/pythondatastructuresandalgorithms - Unknown.txt",
|
||||||
|
"books/Python Essential Reference.txt",
|
||||||
|
"books/PythonEssentialsCheatSheet.txt",
|
||||||
|
"books/Python for Data Analysis.txt",
|
||||||
|
"books/Python_for_Finance.txt",
|
||||||
|
"books/PythonGamesDevelopmentForBeginners.txt",
|
||||||
|
"books/Python GUI Programming Cookbook - Second Edition.txt",
|
||||||
|
"books/pythonguiprogrammingcookbook - Unknown.txt",
|
||||||
|
"books/Python GUI programming with Tkinter ( PDFDrive.com ) (2).txt",
|
||||||
|
"books/pythonhighperformance - Unknown.txt",
|
||||||
|
"books/Python_introduction.txt",
|
||||||
|
"books/Python Machine Learning Blueprints.txt",
|
||||||
|
"books/Python Machine Learning.txt",
|
||||||
|
"books/Python_Machine_Learning.txt",
|
||||||
|
"books/pythonmachinelearning - Unknown.txt",
|
||||||
|
"books/PythonMakingGamesWithPygame.txt",
|
||||||
|
"books/pythonmicroservicesdevelopment - Unknown.txt",
|
||||||
|
"books/Python-NetworkHacks.txt",
|
||||||
|
"books/python-pocket-reference-5th-edition.txt",
|
||||||
|
"books/pythonprogrammingwithraspberrypi - Unknown.txt",
|
||||||
|
"books/PythonTestingBeginnerGuide.txt",
|
||||||
|
"books/Python.Tkinter.Programming.txt",
|
||||||
|
"books/python-tricks.txt",
|
||||||
|
"books/PyWebScrapingBook.txt",
|
||||||
|
"books/Qt5 Python GUI Programming Cookbook_ Building responsive and powerful cross-platform applications with PyQt ( PDFDrive.com ).txt",
|
||||||
|
"books/quality-code-software-testing-principles-practices-and-patterns.txt",
|
||||||
|
"books/radziwill_statisticseasierwithr_preview.txt",
|
||||||
|
"books/randomforest2001.txt",
|
||||||
|
"books/R Cookbook - JD Long.txt",
|
||||||
|
"books/R_Cookbook.txt",
|
||||||
|
"books/R Data Structures and Algorithms.txt",
|
||||||
|
"books/R Deep Learning Cookbook.txt",
|
||||||
|
"books/reactandreactnative.txt",
|
||||||
|
"books/reactdesignpatternsandbestpractices.txt",
|
||||||
|
"books/Reactive Applications with Akka.Net.txt",
|
||||||
|
"books/Reactive Design Patterns.txt",
|
||||||
|
"books/ReactJs.txt",
|
||||||
|
"books/reactnativecookbook_ward.txt",
|
||||||
|
"books/React Native in Action.txt",
|
||||||
|
"books/REACT.txt",
|
||||||
|
"books/Real_Time_Big_Data_Analytics.txt",
|
||||||
|
"books/Real-World_Hadoop_MapR.txt",
|
||||||
|
"books/Redis Essentials.txt",
|
||||||
|
"books/Redis Essentials - Unknown.txt",
|
||||||
|
"books/Redis in Action.txt",
|
||||||
|
"books/Refactoring Improving the Design of Existing Code.txt",
|
||||||
|
"books/Regular Expression Pocket Reference.txt",
|
||||||
|
"books/Regular Expression Pocket Reference - Unknown.txt",
|
||||||
|
"books/Regular Expressions Cookbook.txt",
|
||||||
|
"books/RegularExpressions_Succinctly.txt",
|
||||||
|
"books/Relevant Search_ With applications for Sol - Doug Turnbull John Berryman.txt",
|
||||||
|
"books/Relevant Search - With applications for Solr and Elasticsearch.txt",
|
||||||
|
"books/ResponsiveDesign.txt",
|
||||||
|
"books/Responsive Web Design.txt",
|
||||||
|
"books/Responsive Web Design with HTML5 and CSS3.txt",
|
||||||
|
"books/rest-advanced-research-topics-and-practical-applications.txt",
|
||||||
|
"books/RESTful Java Patterns and Best Practices.txt",
|
||||||
|
"books/RESTful Java Web Services Security.txt",
|
||||||
|
"books/RESTful Java with JAX-RS 2.0, 2nd Edition.txt",
|
||||||
|
"books/RESTful Web API Design with Node.js.txt",
|
||||||
|
"books/RESTful Web APIs.txt",
|
||||||
|
"books/RESTful Web Clients - Enabling Reuse Through Hypermedia.txt",
|
||||||
|
"books/RESTful_Web_Services.txt",
|
||||||
|
"books/RESTful Web Services with Dropwizard.txt",
|
||||||
|
"books/Rexer_Analytics_2013_Data_Miner_Survey_Summary_Report.txt",
|
||||||
|
"books/RFP Proyecto CRM - Herve Cayard.txt",
|
||||||
|
"books/R_in_Action_Second__v15_MEAP.txt",
|
||||||
|
"books/R in a Nutshell, 2nd Edition.txt",
|
||||||
|
"books/Roy Cohn Part 01 of 01.txt",
|
||||||
|
"books/R_Packages.txt",
|
||||||
|
"books/R_ProgrammingSuccinctly.txt",
|
||||||
|
"books/rprogramming.txt",
|
||||||
|
"books/running-lean-iterate-from-plan-a-to-a-plan-that-works-lean-series.txt",
|
||||||
|
"books/RW.txt",
|
||||||
|
"books/Rxjs in Action.txt",
|
||||||
|
"books/Scala_Cookbook.txt",
|
||||||
|
"books/scala-test-driven-development.txt",
|
||||||
|
"books/Schema Matching and Mapping.txt",
|
||||||
|
"books/Secrets of the JavaScript Ninja.txt",
|
||||||
|
"books/Securing Devops - Safe Services in the Cloud.txt",
|
||||||
|
"books/sedandawk.txt",
|
||||||
|
"books/Semantic Web for the Working Ontologist.txt",
|
||||||
|
"books/Semantic Web for the Working Ontologist - Unknown.txt",
|
||||||
|
"books/Semantic Web Programming.txt",
|
||||||
|
"books/Semantic Web Services For Web Databases.txt",
|
||||||
|
"books/Semantic Web Services.txt",
|
||||||
|
"books/Semantic Web Technologies for Business Intelligence.txt",
|
||||||
|
"books/Site Reliability Engineering - How Google Runs Production Systems.txt",
|
||||||
|
"books/Slides - Communicating to Company.txt",
|
||||||
|
"books/Slides - How to Market.txt",
|
||||||
|
"books/Slides - How to Turn Feature Ideas Into User Stories.txt",
|
||||||
|
"books/Slides - Talking to Customers.txt",
|
||||||
|
"books/Slides - User Stories to Actual Features.txt",
|
||||||
|
"books/Slides - What do Product Managers Do.txt",
|
||||||
|
"books/Slides - What I Did As a Product Manager.txt",
|
||||||
|
"books/Slides - What Is Agile Development.txt",
|
||||||
|
"books/Slides - WhosOnTheTeam.txt",
|
||||||
|
"books/Slides - Working With Developers.txt",
|
||||||
|
"books/SLS_corrected_1.4.16.txt",
|
||||||
|
"books/SmashingNodeJs.txt",
|
||||||
|
"books/socc2012_bloom_lattices.txt",
|
||||||
|
"books/Social Data Mining.txt",
|
||||||
|
"books/softwarearchitecturewithpython - Unknown.txt",
|
||||||
|
"books/SoftwareDesignPatterns.txt",
|
||||||
|
"books/software-paradox.txt",
|
||||||
|
"books/software takes command.txt",
|
||||||
|
"books/softwaretestingprinciplesandpractices.txt",
|
||||||
|
"books/SoftwareTesting.txt",
|
||||||
|
"books/SolidPrinciples.txt",
|
||||||
|
"books/S.O.L.I.D_Principles.txt",
|
||||||
|
"books/Spark in Action.txt",
|
||||||
|
"books/Spark - The Definitive Guide - Big Data Processing Made Simple.txt",
|
||||||
|
"books/spatialEpiBook.txt",
|
||||||
|
"books/Speed Reading for Professionals - Mantesh.txt",
|
||||||
|
"books/spring5designpatterns.txt",
|
||||||
|
"books/SQL.Cookbook.2005.txt",
|
||||||
|
"books/StartingIntoAzure.txt",
|
||||||
|
"books/StartingIntoCouchDB.txt",
|
||||||
|
"books/StartingIntoGIT.txt",
|
||||||
|
"books/StartingIntoHTML5.txt",
|
||||||
|
"books/StartingIntoIonic.txt",
|
||||||
|
"books/StartingIntoMySQL.txt",
|
||||||
|
"books/StartingIntoNodeJs.txt",
|
||||||
|
"books/StartingIntoPHPEnvironment.txt",
|
||||||
|
"books/StartingIntoPLC_Programming.txt",
|
||||||
|
"books/StartingIntoXamarinForms.txt",
|
||||||
|
"books/StartUpBestPractices.txt",
|
||||||
|
"books/steve_jobs_walter_isaacson.txt",
|
||||||
|
"books/Streaming Data - Understanding the Real Time Pipeline.txt",
|
||||||
|
"books/Stunning CSS3 A project-based guide to the latest in CSS.txt",
|
||||||
|
"books/Swift.txt",
|
||||||
|
"books/t._cormen_-_introduction_to_algorithms_3rd_edition.txt",
|
||||||
|
"books/tdd-ebook-sample.txt",
|
||||||
|
"books/tensorflow2.txt",
|
||||||
|
"books/TensorFlow for Machine Intelligence - A Hands-On Introduction to Learning Algorithms.txt",
|
||||||
|
"books/TensorFlow Machine Learning Cookbook.txt",
|
||||||
|
"books/Testable JavaScript.txt",
|
||||||
|
"books/Test-Driven JavaScript Development 2.txt",
|
||||||
|
"books/Test Driven.txt",
|
||||||
|
"books/Testing Angular Applications.txt",
|
||||||
|
"books/Text Mining Classification, Clustering, and Applications.txt",
|
||||||
|
"books/TextMiningO.txt",
|
||||||
|
"books/TheArt&ScienceOfJS.txt",
|
||||||
|
"books/The Bastard Operator From Hell.txt",
|
||||||
|
"books/TheBeginnersGuideToNoSQL.txt",
|
||||||
|
"books/The Clean Coder - A Code of Conduct for Professional Programmers.txt",
|
||||||
|
"books/The CSS3 Anthology.txt",
|
||||||
|
"books/The Dart Programming Language.txt",
|
||||||
|
"books/The Data Analytics Handbook.txt",
|
||||||
|
"books/The Data Science Book.txt",
|
||||||
|
"books/The Design of Everyday Things.txt",
|
||||||
|
"books/The DevOps 2.0 Toolkit - Automating the Continuous Deployment Pipeline with Containerized Microservices.txt",
|
||||||
|
"books/The DevOps Adoption Playbook - A Guide to Adopting DevOps in a Multi-Speed IT Enterprise.txt",
|
||||||
|
"books/the-docker-book.txt",
|
||||||
|
"books/The Elements of Statistical Learning - Data Mining, Inference, and Prediction.txt",
|
||||||
|
"books/The Elements of Statistical Learning.txt",
|
||||||
|
"books/The Enterprise Big Data Lake - Delivering the Promise of Big Data and Data Science.txt",
|
||||||
|
"books/The Essential Guide to User Interface Design.txt",
|
||||||
|
"books/The_GNU_Debbuger.txt",
|
||||||
|
"books/The.Go.Programming.Language.txt",
|
||||||
|
"books/The Grammar of Graphics.txt",
|
||||||
|
"books/TheGuideToWireFraming.txt",
|
||||||
|
"books/The Laws of Simplicity.txt",
|
||||||
|
"books/the-lean-mindset-ask-the-right-questions.txt",
|
||||||
|
"books/The Lean Startup - How Today's Entrepreneurs Use Continuous Innovation to Create Radically Successful Businesses.txt",
|
||||||
|
"books/TheLinuxDevelopmentPlatform.txt",
|
||||||
|
"books/TheLinuxKernelModuleProgrammingGuid.txt",
|
||||||
|
"books/TheLinuxProgrammingInterface.txt",
|
||||||
|
"books/The Lion Way - Machine Learning plus Intelligent Optimization.txt",
|
||||||
|
"books/The Meaning of Tingo_ And Other Extraordin - Adam Jacot De Boinod.txt",
|
||||||
|
"books/The Minto Pyramid Principle - Logic in Writing, Thinking, & Problem Solving.txt",
|
||||||
|
"books/The Pragmatic Programmer From Journeyman to Master.txt",
|
||||||
|
"books/The Pragmatic Programmer.txt",
|
||||||
|
"books/The Principles of Beautiful Web Design.txt",
|
||||||
|
"books/The Site Reliability Workbook - Practical Ways to Implement SRE.txt",
|
||||||
|
"books/The Startup Owner s Manual_ The Step-by-Step Guide for Building a Great Company - Blank, Steve.txt",
|
||||||
|
"books/TheUltimateGuideToPrototyping.txt",
|
||||||
|
"books/Think Bayes - Bayesian Statistics Made Simple.txt",
|
||||||
|
"books/thinkbayes.txt",
|
||||||
|
"books/thinkcomplexity.txt",
|
||||||
|
"books/Think Like a Data Scientist. Tackle the data science process step by step.txt",
|
||||||
|
"books/Think Like a Programmer - An Intro. to Creative Problem Solving - V. Spraul (No Starch, 2012) BBS.txt",
|
||||||
|
"books/thinkpython.txt",
|
||||||
|
"books/Think Python.txt",
|
||||||
|
"books/thinkstats2.txt",
|
||||||
|
"books/Think Stats - Allen B. Downey.txt",
|
||||||
|
"books/Think Stats - Exploratory Data Analysis in Python.txt",
|
||||||
|
"books/thinkstats.txt",
|
||||||
|
"books/Think Stats.txt",
|
||||||
|
"books/Third-Party JavaScript.txt",
|
||||||
|
"books/tmux-taster-2014.txt",
|
||||||
|
"books/Transactions on Computational Collective I - Ngoc Thanh Nguyen (Editor).txt",
|
||||||
|
"books/Transactions on Computational Collective Intelligence III.txt",
|
||||||
|
"books/Transactions on Computational Collective Intelligence II.txt",
|
||||||
|
"books/Transactions on Computational Collective Intelligence I.txt",
|
||||||
|
"books/Transactions on Computational Collective Intelligence V.txt",
|
||||||
|
"books/Twitter_Bootstrap3_Succinctly.txt",
|
||||||
|
"books/TypeScript Design Patterns.txt",
|
||||||
|
"books/TypeScript.txt",
|
||||||
|
"books/UbuntuServerGuide.txt",
|
||||||
|
"books/understanding-chief-data-officer.txt",
|
||||||
|
"books/Understanding Computation - From Simple Machines to Impossible Programs.txt",
|
||||||
|
"books/UnderstandingDocker.txt",
|
||||||
|
"books/UnderstandingLinuxKernel3erEdition.txt",
|
||||||
|
"books/Understanding the Chief Data Officer - Unknown.txt",
|
||||||
|
"books/UnityGameDevelopment.txt",
|
||||||
|
"books/university-startups-and-spin-offs-guide-for-entrepreneurs-in-academia.txt",
|
||||||
|
"books/unixpowertools.txt",
|
||||||
|
"books/Unknown - Unknown.txt",
|
||||||
|
"books/User Interface Design for Programmers.txt",
|
||||||
|
"books/User Story Mapping - Discover the Whole Story, Build the Right Product.txt",
|
||||||
|
"books/using-asyncio-python-understanding-asynchronous.txt",
|
||||||
|
"books/Using AWS Lambda and Claudia.js.txt",
|
||||||
|
"books/using-docker.txt",
|
||||||
|
"books/Using Node.js for UI Testing.txt",
|
||||||
|
"books/usingsvgwithcss3andhtml5.txt",
|
||||||
|
"books/UX for Leaan Startups.txt",
|
||||||
|
"books/UX_Strategy.txt",
|
||||||
|
"books/VBAProfessionalTipsSecrets.txt",
|
||||||
|
"books/Version Control by Example.txt",
|
||||||
|
"books/Visualizing Data.txt",
|
||||||
|
"books/vuejs2designpatternsandbestpractices.txt",
|
||||||
|
"books/VueJs2.txt",
|
||||||
|
"books/vuejs_upandrunning.txt",
|
||||||
|
"books/Web Crawling and Data Mining with Apache Nutch.txt",
|
||||||
|
"books/Web Data Mining.txt",
|
||||||
|
"books/Web Development Recipes.txt",
|
||||||
|
"books/webdevelopmentwithdjangocookbook - Unknown.txt",
|
||||||
|
"books/Web Development with Node and Express.txt",
|
||||||
|
"books/Web Information Retrieval.txt",
|
||||||
|
"books/Web Mining and Social Networking Techniques and Applications.txt",
|
||||||
|
"books/Web Scraping with Python - Collecting More Data from the Modern Web.txt",
|
||||||
|
"books/why-startups-fail-and-how-yours-can-succeed.txt",
|
||||||
|
"books/Wiley - Pairs Trading - Quantitative Methods and Analysis.txt",
|
||||||
|
"books/wordpress5complete.txt",
|
||||||
|
"books/WPF.txt",
|
||||||
|
"books/youdontknowjs_es6andbeyond.txt",
|
||||||
|
"books/youdontknowjs_scopeandclosures.txt",
|
||||||
|
"books/youdontknowjs_upandgoing.txt",
|
||||||
|
"books/zero-one.txt",
|
||||||
|
"books/ZooKeeper - Distributed process coordination.txt",
|
||||||
|
];
|
||||||
@ -3,7 +3,7 @@
|
|||||||
|
|
||||||
#![allow(dead_code)]
|
#![allow(dead_code)]
|
||||||
|
|
||||||
mod books;
|
//mod books;
|
||||||
|
|
||||||
use rayon::prelude::*;
|
use rayon::prelude::*;
|
||||||
use std::cell::RefCell;
|
use std::cell::RefCell;
|
||||||
@ -183,7 +183,8 @@ fn analyze(text: &[u8], stats: &mut Stats) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
let word = &text[word_start..idx];
|
let word = &text[word_start..idx];
|
||||||
|
// let word = unsafe { &text.get_unchecked(word_start..idx) };
|
||||||
|
|
||||||
// dbg!(str::from_utf8(word).unwrap());
|
// dbg!(str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
words += 1;
|
words += 1;
|
||||||
|
|||||||
155
12bitfloat_rust/risspam/src/main_old.rs
Normal file
155
12bitfloat_rust/risspam/src/main_old.rs
Normal file
@ -0,0 +1,155 @@
|
|||||||
|
#![feature(let_chains)]
|
||||||
|
|
||||||
|
use rayon::prelude::*;
|
||||||
|
//use rayon::prelude::*;
|
||||||
|
use std::{env, fs};
|
||||||
|
|
||||||
|
fn clean_content(content: &str) -> String {
|
||||||
|
let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?";
|
||||||
|
|
||||||
|
let clean_content = content.chars()
|
||||||
|
.filter(|&c| alloed_ichars.contains(c))
|
||||||
|
.collect::<String>();
|
||||||
|
|
||||||
|
clean_content
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_sentences(content: &str) -> usize {
|
||||||
|
let sentences = content.split('.')
|
||||||
|
.map(|s| s.trim_start()) // Remove leading whitespace
|
||||||
|
.count();
|
||||||
|
|
||||||
|
// // Remove last "sentence" if didn't end with a dot
|
||||||
|
// if let Some(last) = sentences.last() && !last.ends_with('.') {
|
||||||
|
// sentences.pop();
|
||||||
|
// }
|
||||||
|
|
||||||
|
sentences
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) {
|
||||||
|
fn check_forbidden(w: &str) -> bool {
|
||||||
|
FORBIDDEN_WORDS.iter()
|
||||||
|
.find(|fw| str::eq_ignore_ascii_case(w, fw))
|
||||||
|
.is_some()
|
||||||
|
}
|
||||||
|
|
||||||
|
for word in content.split_whitespace() {
|
||||||
|
*words += 1;
|
||||||
|
|
||||||
|
if is_fully_capitalized_word(word) {
|
||||||
|
*caps += 1;
|
||||||
|
}
|
||||||
|
if check_forbidden(word) {
|
||||||
|
*fw += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn is_fully_capitalized_word(word: &str) -> bool {
|
||||||
|
word.chars()
|
||||||
|
.all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase())
|
||||||
|
}
|
||||||
|
|
||||||
|
//fn get_capitalized_words(content: &str) -> usize {
|
||||||
|
// let sentences = get_sentences(content);
|
||||||
|
//// let mut cap_words = vec![];
|
||||||
|
// let mut count = 0;
|
||||||
|
//
|
||||||
|
// for sentence in sentences {
|
||||||
|
// // Always skip the first word since sentences start with
|
||||||
|
// for word in get_words(sentence).skip(1) {
|
||||||
|
// if is_fully_capitalized_word(word) {
|
||||||
|
// count += 1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// count
|
||||||
|
//}
|
||||||
|
|
||||||
|
fn get_numbers(clean_content: &str) -> usize {
|
||||||
|
clean_content.split(|c: char| !c.is_ascii_digit())
|
||||||
|
.count()
|
||||||
|
}
|
||||||
|
|
||||||
|
//fn get_forbidden_words(content: &str) -> usize {
|
||||||
|
// fn check_forbidden(w: &str) -> bool {
|
||||||
|
// FORBIDDEN_WORDS.iter()
|
||||||
|
// .find(|fw| str::eq_ignore_ascii_case(w, fw))
|
||||||
|
// .is_some()
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// get_words(content)
|
||||||
|
// .filter(|w| check_forbidden(w))
|
||||||
|
// .collect()
|
||||||
|
//}
|
||||||
|
|
||||||
|
fn analyze(data: &str) {
|
||||||
|
let clean_data = clean_content(data);
|
||||||
|
// drop(clean_data); // You aren't actually using clean_data :O
|
||||||
|
|
||||||
|
// All capitalized words
|
||||||
|
let mut words = 0;
|
||||||
|
let mut fw = 0;
|
||||||
|
let mut cap_words = 0;
|
||||||
|
get_words(&clean_data, &mut words, &mut fw, &mut cap_words);
|
||||||
|
|
||||||
|
println!("All capitalized words: {}", cap_words);
|
||||||
|
|
||||||
|
// All sentences
|
||||||
|
let sentences = get_sentences(data);
|
||||||
|
println!("Sentences: {}", sentences);
|
||||||
|
|
||||||
|
// All words
|
||||||
|
println!("Words: {}", words);
|
||||||
|
|
||||||
|
// Numbers
|
||||||
|
let numbers = get_numbers(&clean_data);
|
||||||
|
println!("Numbers: {}", numbers);
|
||||||
|
|
||||||
|
// Forbidden words
|
||||||
|
println!("Forbidden words: {}", fw);
|
||||||
|
|
||||||
|
if sentences > 0 {
|
||||||
|
let word_count_per_sentence = words / sentences;
|
||||||
|
println!("Word count per sentence: {}", word_count_per_sentence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Read in files from args
|
||||||
|
let mut files = Vec::with_capacity(env::args().len());
|
||||||
|
let mut do_parallel = false;
|
||||||
|
|
||||||
|
for arg in env::args().skip(1) { // skip program arg
|
||||||
|
if arg == "-p" {
|
||||||
|
do_parallel = true;
|
||||||
|
} else {
|
||||||
|
files.push(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do the work
|
||||||
|
let work = |file| {
|
||||||
|
let Ok(text) = fs::read_to_string(&file) else {
|
||||||
|
eprintln!("{file} isn't a valid file or couldn't be read");
|
||||||
|
return;
|
||||||
|
};
|
||||||
|
|
||||||
|
analyze(&text);
|
||||||
|
};
|
||||||
|
|
||||||
|
if !do_parallel {
|
||||||
|
files.iter().for_each(work);
|
||||||
|
} else {
|
||||||
|
files.par_iter().for_each(work)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static FORBIDDEN_WORDS: &'static [&'static str] = &[
|
||||||
|
"recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com",
|
||||||
|
"@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency",
|
||||||
|
"stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century",
|
||||||
|
"transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds"
|
||||||
|
];
|
||||||
828
12bitfloat_rust/risspam/src/main_pre_monoio.rs
Normal file
828
12bitfloat_rust/risspam/src/main_pre_monoio.rs
Normal file
@ -0,0 +1,828 @@
|
|||||||
|
#![feature(likely_unlikely)]
|
||||||
|
|
||||||
|
mod books;
|
||||||
|
|
||||||
|
use crate::books::FULL_BOOK_PATHS;
|
||||||
|
use core_affinity::CoreId;
|
||||||
|
use memmap2::Mmap;
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use std::cell::OnceCell;
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::ffi::{OsStr, OsString};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::mem::MaybeUninit;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::os::linux::raw::stat;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use std::thread::available_parallelism;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::{array, env, fs, hint, mem, process, thread};
|
||||||
|
use std::io::Read;
|
||||||
|
use libc::{aio_read, aiocb};
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_whitespace(b: u8) -> bool {
|
||||||
|
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_upper(b: u8) -> bool {
|
||||||
|
matches!(b, b'A'..=b'Z')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_digit(b: u8) -> bool {
|
||||||
|
matches!(b, b'0'..=b'9')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(align(128))]
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
struct Stats {
|
||||||
|
pub sentences: u32,
|
||||||
|
pub words: u32,
|
||||||
|
pub capitalizeds: u32,
|
||||||
|
pub numbers: u32,
|
||||||
|
pub forbiddens: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
static TIME_SPENT_READING_FILES: Mutex<Duration> = Mutex::new(Duration::from_secs(0));
|
||||||
|
|
||||||
|
const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024;
|
||||||
|
thread_local! {
|
||||||
|
static WORK_STATE: RefCell<WorkState> = RefCell::new(WorkState::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WorkState {
|
||||||
|
pub work_mem: Box<[u8]>,
|
||||||
|
// pub io_mem: Box<[u8]>,
|
||||||
|
// pub curr_read: Option<aiocb>,
|
||||||
|
// pub had_first_load: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkState {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
// curr_read: None,
|
||||||
|
// had_first_load: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn work(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
|
||||||
|
let mut file = File::open(file_path).unwrap();
|
||||||
|
let file_len = file.metadata().unwrap().len() as usize;
|
||||||
|
file.read_exact(&mut state.work_mem[..file_len]).unwrap();
|
||||||
|
let text = &state.work_mem[..file_len];
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
let mut cb = mem::zeroed();
|
||||||
|
|
||||||
|
aio_read(&raw mut cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
|
||||||
|
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
analyze(&text, stats);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn analyze(text: &[u8], stats: &mut Stats) {
|
||||||
|
// // NOTE: mmap is quite a bit slower
|
||||||
|
// // Load file
|
||||||
|
// let Ok(file) = File::open(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// std::process::abort();
|
||||||
|
// };
|
||||||
|
// let mmap = unsafe {
|
||||||
|
// Mmap::map(&file).unwrap()
|
||||||
|
// };
|
||||||
|
// mem::forget(file);
|
||||||
|
// let text = &*mmap;
|
||||||
|
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
let mut idx = 0;
|
||||||
|
'full_loop: loop {
|
||||||
|
// Skip whitespace
|
||||||
|
while is_ascii_whitespace(text[idx]) {
|
||||||
|
idx += 1;
|
||||||
|
if idx >= text.len() {
|
||||||
|
break 'full_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find end of word
|
||||||
|
let word_start = idx;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
|
||||||
|
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
||||||
|
idx += 1;
|
||||||
|
if idx >= text.len() {
|
||||||
|
break 'find_word_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-char logic
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
if !is_ascii_upper(b) {
|
||||||
|
has_non_upper = true;
|
||||||
|
}
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
// dbg!(str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check forbidden
|
||||||
|
if unsafe { FW_TAB.lookup(word) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
if unsafe { FW_TAB.lookup(token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
// NOTE: This is pretty slow:
|
||||||
|
let mut idx = 0;
|
||||||
|
let mut word_start = 0;
|
||||||
|
let mut is_in_word = false;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
loop {
|
||||||
|
let b = unsafe { *text.get_unchecked(idx) };
|
||||||
|
|
||||||
|
let mut process_word = false;
|
||||||
|
if is_ascii_whitespace(b) {
|
||||||
|
if is_in_word {
|
||||||
|
process_word = true;
|
||||||
|
|
||||||
|
// Reset state for next word
|
||||||
|
is_in_word = false;
|
||||||
|
has_non_upper = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !is_in_word {
|
||||||
|
word_start = idx;
|
||||||
|
is_in_word = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
has_non_upper |= !is_ascii_upper(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
// Check sentences
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
idx += 1;
|
||||||
|
|
||||||
|
if process_word || idx >= text.len() {
|
||||||
|
words += 1;
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("'{}'", str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx >= text.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
// Load file
|
||||||
|
let Ok(text) = fs::read(file_path) else {
|
||||||
|
eprintln!("invalid file!");
|
||||||
|
std::process::abort();
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Read in files from args
|
||||||
|
let mut files = Vec::with_capacity(env::args().len());
|
||||||
|
let mut do_parallel = false;
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
for arg in env::args_os().skip(1) {
|
||||||
|
// skip program arg
|
||||||
|
if arg == "-p" {
|
||||||
|
do_parallel = true;
|
||||||
|
} else {
|
||||||
|
files.push(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("[PROFILE] taking args took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// env::args_os().
|
||||||
|
|
||||||
|
// let files = FULL_BOOK_PATHS;
|
||||||
|
|
||||||
|
// // Build table
|
||||||
|
// let tab = FwTab::build();
|
||||||
|
// tab.compile();
|
||||||
|
|
||||||
|
// Do the work
|
||||||
|
let mut stats = vec![Stats {
|
||||||
|
sentences: 0,
|
||||||
|
words: 0,
|
||||||
|
capitalizeds: 0,
|
||||||
|
numbers: 0,
|
||||||
|
forbiddens: 0,
|
||||||
|
}; files.len()];
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let num_cores = available_parallelism().unwrap().get();
|
||||||
|
let num_threads = num_cores * 1;
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
dbg!(num_threads);
|
||||||
|
dbg!(num_cores);
|
||||||
|
|
||||||
|
rayon::ThreadPoolBuilder::new()
|
||||||
|
.num_threads(num_threads)
|
||||||
|
.build_global()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
files.par_iter()
|
||||||
|
.enumerate()
|
||||||
|
.for_each(|(idx, p)| {
|
||||||
|
let s = unsafe {
|
||||||
|
&mut *stats.as_ptr()
|
||||||
|
.offset(idx as isize)
|
||||||
|
.cast_mut()
|
||||||
|
};
|
||||||
|
|
||||||
|
// let mut path = OsString::from("../../");
|
||||||
|
// path.push(p);
|
||||||
|
let path = p;
|
||||||
|
work(path, s);
|
||||||
|
});
|
||||||
|
|
||||||
|
// thread::scope(|scope| {
|
||||||
|
// let files_per_thread = files.len() / num_threads;
|
||||||
|
//
|
||||||
|
// for thread_idx in 0..num_threads {
|
||||||
|
// let capture_files = &files;
|
||||||
|
// let capture_stats = &stats;
|
||||||
|
// thread::Builder::new().spawn_scoped(scope, move || {
|
||||||
|
// let files = capture_files;
|
||||||
|
// let stats = capture_stats;
|
||||||
|
//
|
||||||
|
// // Set thread affinity
|
||||||
|
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
|
||||||
|
//
|
||||||
|
// // Do work
|
||||||
|
// let thread_start = thread_idx * files_per_thread;
|
||||||
|
// for i in 0..files_per_thread {
|
||||||
|
// let real_idx = thread_start + i;
|
||||||
|
// let file_path = &files[real_idx];
|
||||||
|
// let st = unsafe {
|
||||||
|
// &mut *stats.as_ptr()
|
||||||
|
// .offset(real_idx as isize)
|
||||||
|
// .cast_mut()
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// work(&file_path, st);
|
||||||
|
// }
|
||||||
|
// }).unwrap();
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
|
||||||
|
println!("[PROFILE] processing text took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// Accumulate stats
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let mut total_words = 0;
|
||||||
|
let mut total_capitalizeds = 0;
|
||||||
|
let mut total_sentences = 0;
|
||||||
|
let mut total_numbers = 0;
|
||||||
|
let mut total_forbiddens = 0;
|
||||||
|
|
||||||
|
for stat in &stats {
|
||||||
|
total_words += stat.words;
|
||||||
|
total_capitalizeds += stat.capitalizeds;
|
||||||
|
total_sentences += stat.sentences;
|
||||||
|
total_numbers += stat.numbers;
|
||||||
|
total_forbiddens += stat.forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0;
|
||||||
|
let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0;
|
||||||
|
let word_count_per_sentence = total_words as f32 / total_sentences as f32;
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Total Words: {total_words}");
|
||||||
|
println!("Total Capitalized words: {total_capitalizeds}");
|
||||||
|
println!("Total Sentences: {total_sentences}");
|
||||||
|
println!("Total Numbers: {total_numbers}");
|
||||||
|
println!("Total Forbidden words: {total_forbiddens}");
|
||||||
|
println!("Capitalized percentage: {capitalized_percentage:.6}");
|
||||||
|
println!("Forbidden percentage: {forbidden_percentage:.6}");
|
||||||
|
println!("Word count per sentence: {word_count_per_sentence:.6}");
|
||||||
|
println!("Total files read: {}", files.len());
|
||||||
|
|
||||||
|
println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap());
|
||||||
|
|
||||||
|
// Exit process to avoid running drops
|
||||||
|
process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct FwTab {
|
||||||
|
// pub dir_and_len_bits: [u32; 256],
|
||||||
|
pub dir_len_bits: [u16; 256],
|
||||||
|
pub dir: [u8; 256],
|
||||||
|
pub strs: [u8; 256],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FwTab {
|
||||||
|
pub fn build() -> Self {
|
||||||
|
// Sort fws by first char
|
||||||
|
let mut sorted_fws: Vec<Vec<&'static [u8]>> = vec![vec![]; 256];
|
||||||
|
|
||||||
|
for word in FORBIDDEN_WORDS {
|
||||||
|
sorted_fws[word[0] as usize].push(&word);
|
||||||
|
}
|
||||||
|
for i in 0..256 {
|
||||||
|
sorted_fws[i].sort()
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Build str tab
|
||||||
|
let mut fw_dir = [0u8; 256];
|
||||||
|
// let mut fw_dir_len_bits = [0u32; 256];
|
||||||
|
let mut fw_dir_len_bits = [0u16; 256];
|
||||||
|
let mut fw_strs: Vec<u8> = vec![];
|
||||||
|
|
||||||
|
fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries
|
||||||
|
|
||||||
|
for c in 0..256 {
|
||||||
|
for fw in FORBIDDEN_WORDS {
|
||||||
|
if c == fw[0] as usize {
|
||||||
|
fw_dir_len_bits[c] |= 0x1 << fw.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sorted_fws[c].is_empty() {
|
||||||
|
let sublist_start_offset = fw_strs.len().try_into().unwrap();
|
||||||
|
fw_dir[c] = sublist_start_offset;
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("{c} start offset: {}", sublist_start_offset);
|
||||||
|
println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Push strings
|
||||||
|
for fw in &sorted_fws[c] {
|
||||||
|
fw_strs.push(fw.len().try_into().unwrap());
|
||||||
|
for &c in &fw[1..] {
|
||||||
|
fw_strs.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark end of per-char word sublist
|
||||||
|
fw_strs.push(b'\0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("strs len: {}", fw_strs.len());
|
||||||
|
|
||||||
|
assert_eq!(fw_dir.len(), 256);
|
||||||
|
assert!(fw_strs.len() <= 256);
|
||||||
|
|
||||||
|
fw_strs.resize(256, 0);
|
||||||
|
|
||||||
|
let tab = FwTab {
|
||||||
|
dir: fw_dir,
|
||||||
|
dir_len_bits: fw_dir_len_bits,
|
||||||
|
// dir_and_len_bits: array::from_fn(|idx| {
|
||||||
|
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
|
||||||
|
// }),
|
||||||
|
strs: fw_strs.try_into().unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// DEBUG: Test some strings
|
||||||
|
unsafe {
|
||||||
|
dbg!(tab.lookup(b"cpm"));
|
||||||
|
dbg!(tab.lookup(b"com"));
|
||||||
|
dbg!(tab.lookup(b"coma"));
|
||||||
|
dbg!(tab.lookup(b"co"));
|
||||||
|
dbg!(tab.lookup(b"cam"));
|
||||||
|
dbg!(tab.lookup(b"crypto"));
|
||||||
|
dbg!(tab.lookup(b"@"));
|
||||||
|
dbg!(tab.lookup(b""));
|
||||||
|
dbg!(tab.lookup(b" "));
|
||||||
|
dbg!(tab.lookup(b"test"));
|
||||||
|
dbg!(tab.lookup(b"expers"));
|
||||||
|
}
|
||||||
|
|
||||||
|
tab
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compile(&self) {
|
||||||
|
println!("static FW_TAB: FwTab = FwTab {{");
|
||||||
|
|
||||||
|
// println!("\tdir_and_len_bits: [");
|
||||||
|
// for chunk in self.dir_and_len_bits.chunks(16) {
|
||||||
|
// print!("\t\t");
|
||||||
|
// for &b in chunk {
|
||||||
|
// print!("0x{b:08x}, ");
|
||||||
|
// }
|
||||||
|
// println!();
|
||||||
|
// }
|
||||||
|
// println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir: [");
|
||||||
|
for chunk in self.dir.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir_len_bits: [");
|
||||||
|
for chunk in self.dir_len_bits.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:04x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tstrs: [");
|
||||||
|
for chunk in self.strs.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("}};");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn lookup(&self, word: &[u8]) -> bool {
|
||||||
|
let &[first_char, ..] = word else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// let dir_and_len_bits = unsafe {
|
||||||
|
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
|
||||||
|
// };
|
||||||
|
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let len_bits = unsafe {
|
||||||
|
*self.dir_len_bits.get_unchecked(first_char as usize)
|
||||||
|
};
|
||||||
|
if word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
|
||||||
|
let mut str_offset = unsafe {
|
||||||
|
*self.dir.get_unchecked(first_char as usize) as usize
|
||||||
|
};
|
||||||
|
|
||||||
|
// Char doesn't have any strings in the table
|
||||||
|
if str_offset == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over strs
|
||||||
|
loop {
|
||||||
|
// let fw_len = u16::from_le_bytes([
|
||||||
|
// self.strs[str_offset],
|
||||||
|
// self.strs[str_offset+1]
|
||||||
|
// ]);
|
||||||
|
let fw_len: u8 = unsafe {
|
||||||
|
*self.strs.get_unchecked(str_offset)
|
||||||
|
};
|
||||||
|
|
||||||
|
if fw_len == 0 {
|
||||||
|
// We've reached the end of the word sublist
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only compare words if they are the same length
|
||||||
|
if word.len() == fw_len as usize {
|
||||||
|
// Compare strs
|
||||||
|
let mut char_offset = 1usize;
|
||||||
|
loop {
|
||||||
|
// Found the word!
|
||||||
|
if char_offset == word.len() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) };
|
||||||
|
let word_char = unsafe { *word.get_unchecked(char_offset) };
|
||||||
|
|
||||||
|
if fw_char > word_char {
|
||||||
|
// Word can't possible be in the sorted list, return
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if fw_char < word_char {
|
||||||
|
// Try next word
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
char_offset += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance to next word
|
||||||
|
// let str_len_bytes = 2;
|
||||||
|
let str_len_bytes = 1;
|
||||||
|
str_offset += (fw_len as usize - 1) + str_len_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const FORBIDDEN_WORDS: [&'static [u8]; 35] = [
|
||||||
|
b"recovery",
|
||||||
|
b"techie",
|
||||||
|
b"http",
|
||||||
|
b"https",
|
||||||
|
b"digital",
|
||||||
|
b"hack",
|
||||||
|
b"::",
|
||||||
|
b"//",
|
||||||
|
b"com",
|
||||||
|
b"@",
|
||||||
|
b"crypto",
|
||||||
|
b"bitcoin",
|
||||||
|
b"wallet",
|
||||||
|
b"hacker",
|
||||||
|
b"welcome",
|
||||||
|
b"whatsapp",
|
||||||
|
b"email",
|
||||||
|
b"cryptocurrency",
|
||||||
|
b"stolen",
|
||||||
|
b"freeze",
|
||||||
|
b"quick",
|
||||||
|
b"crucial",
|
||||||
|
b"tracing",
|
||||||
|
b"scammers",
|
||||||
|
b"expers",
|
||||||
|
b"hire",
|
||||||
|
b"century",
|
||||||
|
b"transaction",
|
||||||
|
b"essential",
|
||||||
|
b"managing",
|
||||||
|
b"contact",
|
||||||
|
b"contacting",
|
||||||
|
b"understanding",
|
||||||
|
b"assets",
|
||||||
|
b"funds",
|
||||||
|
];
|
||||||
|
|
||||||
|
static FW_TAB: FwTab = FwTab {
|
||||||
|
dir: [
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
dir_len_bits: [
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
],
|
||||||
|
strs: [
|
||||||
|
0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
//static FW_TAB_DIR: [u8; 256] = [
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
//];
|
||||||
|
//static FW_TAB_STRS: [u8; 244] = [
|
||||||
|
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
// 0x61, 0x70, 0x70, 0x00,
|
||||||
|
//];
|
||||||
891
12bitfloat_rust/risspam/src/main_pre_simd_cmp.rs
Normal file
891
12bitfloat_rust/risspam/src/main_pre_simd_cmp.rs
Normal file
@ -0,0 +1,891 @@
|
|||||||
|
#![feature(likely_unlikely)]
|
||||||
|
#![feature(rust_cold_cc)]
|
||||||
|
|
||||||
|
mod books;
|
||||||
|
|
||||||
|
use crate::books::FULL_BOOK_PATHS;
|
||||||
|
use core_affinity::CoreId;
|
||||||
|
use libc::{aio_read, aiocb, read};
|
||||||
|
use memmap2::Mmap;
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use std::cell::OnceCell;
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::ffi::{OsStr, OsString};
|
||||||
|
use std::fs::{File, OpenOptions};
|
||||||
|
use std::io::Read;
|
||||||
|
use std::mem::MaybeUninit;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::os::linux::raw::stat;
|
||||||
|
use std::sync::Mutex;
|
||||||
|
use std::thread::available_parallelism;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::{array, env, fs, hint, mem, process, thread};
|
||||||
|
use std::hint::assert_unchecked;
|
||||||
|
use std::os::unix::fs::{FileExt, OpenOptionsExt};
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_whitespace(b: u8) -> bool {
|
||||||
|
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_upper(b: u8) -> bool {
|
||||||
|
matches!(b, b'A'..=b'Z')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_digit(b: u8) -> bool {
|
||||||
|
matches!(b, b'0'..=b'9')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(align(128))]
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
struct Stats {
|
||||||
|
pub sentences: u32,
|
||||||
|
pub words: u32,
|
||||||
|
pub capitalizeds: u32,
|
||||||
|
pub numbers: u32,
|
||||||
|
pub forbiddens: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
static TIME_SPENT_READING_FILES: Mutex<Duration> = Mutex::new(Duration::from_secs(0));
|
||||||
|
|
||||||
|
const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024;
|
||||||
|
thread_local! {
|
||||||
|
static WORK_STATE: RefCell<WorkState> = RefCell::new(WorkState::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WorkState {
|
||||||
|
pub work_mem: Box<[u8]>,
|
||||||
|
// pub io_mem: Box<[u8]>,
|
||||||
|
// pub curr_read: Option<aiocb>,
|
||||||
|
// pub had_first_load: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkState {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
// curr_read: None,
|
||||||
|
// had_first_load: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cold]
|
||||||
|
#[inline(never)]
|
||||||
|
extern "rust-cold" fn die() -> ! {
|
||||||
|
println!("Something went wrong! I'm going to die now");
|
||||||
|
process::abort()
|
||||||
|
}
|
||||||
|
|
||||||
|
fn work(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
|
||||||
|
// NOTE: Reading the file like this is noticeably faster!
|
||||||
|
let mut file = OpenOptions::new()
|
||||||
|
.read(true)
|
||||||
|
// .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!!
|
||||||
|
.open(file_path)
|
||||||
|
.unwrap_or_else(|_| die());
|
||||||
|
|
||||||
|
let mut read_offset = 0;
|
||||||
|
loop {
|
||||||
|
// let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64)
|
||||||
|
let rb = file.read(&mut state.work_mem[read_offset..])
|
||||||
|
.unwrap_or_else(|_| die());
|
||||||
|
|
||||||
|
if hint::unlikely(rb == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
read_offset += rb;
|
||||||
|
}
|
||||||
|
let text = &state.work_mem[..read_offset];
|
||||||
|
|
||||||
|
// file.read_exact(&mut state.work_mem[..file_len]).unwrap();
|
||||||
|
|
||||||
|
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
|
||||||
|
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
analyze(&text, stats);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn analyze(text: &[u8], stats: &mut Stats) {
|
||||||
|
// // NOTE: mmap is quite a bit slower
|
||||||
|
// // Load file
|
||||||
|
// let Ok(file) = File::open(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// std::process::abort();
|
||||||
|
// };
|
||||||
|
// let mmap = unsafe {
|
||||||
|
// Mmap::map(&file).unwrap()
|
||||||
|
// };
|
||||||
|
// mem::forget(file);
|
||||||
|
// let text = &*mmap;
|
||||||
|
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
let mut idx = 0;
|
||||||
|
'full_loop: loop {
|
||||||
|
// Skip whitespace
|
||||||
|
while is_ascii_whitespace(text[idx]) {
|
||||||
|
idx += 1;
|
||||||
|
if hint::unlikely(idx >= text.len()) {
|
||||||
|
break 'full_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find end of word
|
||||||
|
let word_start = idx;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
|
||||||
|
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
||||||
|
idx += 1;
|
||||||
|
if hint::unlikely(idx >= text.len()) {
|
||||||
|
break 'find_word_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-char logic
|
||||||
|
if !is_ascii_upper(b) {
|
||||||
|
has_non_upper = true;
|
||||||
|
}
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
// sentences += (b == b'.') as u32;
|
||||||
|
// numbers += is_ascii_digit(b) as u32;
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
// dbg!(str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check forbidden
|
||||||
|
if unsafe { FW_TAB.lookup(word) } {
|
||||||
|
// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
if unsafe { FW_TAB.lookup(token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
// NOTE: This is pretty slow:
|
||||||
|
let mut idx = 0;
|
||||||
|
let mut word_start = 0;
|
||||||
|
let mut is_in_word = false;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
loop {
|
||||||
|
let b = unsafe { *text.get_unchecked(idx) };
|
||||||
|
|
||||||
|
let mut process_word = false;
|
||||||
|
if is_ascii_whitespace(b) {
|
||||||
|
if is_in_word {
|
||||||
|
process_word = true;
|
||||||
|
|
||||||
|
// Reset state for next word
|
||||||
|
is_in_word = false;
|
||||||
|
has_non_upper = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !is_in_word {
|
||||||
|
word_start = idx;
|
||||||
|
is_in_word = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
has_non_upper |= !is_ascii_upper(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
// Check sentences
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
idx += 1;
|
||||||
|
|
||||||
|
if process_word || idx >= text.len() {
|
||||||
|
words += 1;
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("'{}'", str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx >= text.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
// Load file
|
||||||
|
let Ok(text) = fs::read(file_path) else {
|
||||||
|
eprintln!("invalid file!");
|
||||||
|
std::process::abort();
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Read in files from args
|
||||||
|
let mut files = Vec::with_capacity(env::args().len());
|
||||||
|
let mut do_parallel = false;
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
for arg in env::args_os().skip(1) {
|
||||||
|
// skip program arg
|
||||||
|
if arg == "-p" {
|
||||||
|
do_parallel = true;
|
||||||
|
} else {
|
||||||
|
files.push(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("[PROFILE] taking args took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// env::args_os().
|
||||||
|
|
||||||
|
// let files = FULL_BOOK_PATHS;
|
||||||
|
|
||||||
|
// // Build table
|
||||||
|
// let tab = FwTab::build();
|
||||||
|
// tab.compile();
|
||||||
|
|
||||||
|
// Do the work
|
||||||
|
let mut stats = vec![Stats {
|
||||||
|
sentences: 0,
|
||||||
|
words: 0,
|
||||||
|
capitalizeds: 0,
|
||||||
|
numbers: 0,
|
||||||
|
forbiddens: 0,
|
||||||
|
}; files.len()];
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let num_cores = available_parallelism().unwrap().get();
|
||||||
|
let num_threads = num_cores * 1;
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// dbg!(num_threads);
|
||||||
|
// dbg!(num_cores);
|
||||||
|
|
||||||
|
rayon::ThreadPoolBuilder::new()
|
||||||
|
.num_threads(num_threads)
|
||||||
|
.build_global()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
files.par_iter()
|
||||||
|
.enumerate()
|
||||||
|
.for_each(|(idx, p)| {
|
||||||
|
let s = unsafe {
|
||||||
|
&mut *stats.as_ptr()
|
||||||
|
.offset(idx as isize)
|
||||||
|
.cast_mut()
|
||||||
|
};
|
||||||
|
|
||||||
|
// let mut path = OsString::from("../../");
|
||||||
|
// path.push(p);
|
||||||
|
let path = p;
|
||||||
|
work(path, s);
|
||||||
|
});
|
||||||
|
|
||||||
|
// thread::scope(|scope| {
|
||||||
|
// let files_per_thread = files.len() / num_threads;
|
||||||
|
//
|
||||||
|
// for thread_idx in 0..num_threads {
|
||||||
|
// let capture_files = &files;
|
||||||
|
// let capture_stats = &stats;
|
||||||
|
// thread::Builder::new().spawn_scoped(scope, move || {
|
||||||
|
// let files = capture_files;
|
||||||
|
// let stats = capture_stats;
|
||||||
|
//
|
||||||
|
// // Set thread affinity
|
||||||
|
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
|
||||||
|
//
|
||||||
|
// // Do work
|
||||||
|
// let thread_start = thread_idx * files_per_thread;
|
||||||
|
// for i in 0..files_per_thread {
|
||||||
|
// let real_idx = thread_start + i;
|
||||||
|
// let file_path = &files[real_idx];
|
||||||
|
// let st = unsafe {
|
||||||
|
// &mut *stats.as_ptr()
|
||||||
|
// .offset(real_idx as isize)
|
||||||
|
// .cast_mut()
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// work(&file_path, st);
|
||||||
|
// }
|
||||||
|
// }).unwrap();
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
|
||||||
|
println!("[PROFILE] processing text took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// Accumulate stats
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let mut total_words = 0;
|
||||||
|
let mut total_capitalizeds = 0;
|
||||||
|
let mut total_sentences = 0;
|
||||||
|
let mut total_numbers = 0;
|
||||||
|
let mut total_forbiddens = 0;
|
||||||
|
|
||||||
|
for stat in &stats {
|
||||||
|
total_words += stat.words;
|
||||||
|
total_capitalizeds += stat.capitalizeds;
|
||||||
|
total_sentences += stat.sentences;
|
||||||
|
total_numbers += stat.numbers;
|
||||||
|
total_forbiddens += stat.forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0;
|
||||||
|
let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0;
|
||||||
|
let word_count_per_sentence = total_words as f32 / total_sentences as f32;
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Total Words: {total_words}");
|
||||||
|
println!("Total Capitalized words: {total_capitalizeds}");
|
||||||
|
println!("Total Sentences: {total_sentences}");
|
||||||
|
println!("Total Numbers: {total_numbers}");
|
||||||
|
println!("Total Forbidden words: {total_forbiddens}");
|
||||||
|
println!("Capitalized percentage: {capitalized_percentage:.6}");
|
||||||
|
println!("Forbidden percentage: {forbidden_percentage:.6}");
|
||||||
|
println!("Word count per sentence: {word_count_per_sentence:.6}");
|
||||||
|
println!("Total files read: {}", files.len());
|
||||||
|
|
||||||
|
println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap());
|
||||||
|
|
||||||
|
// Exit process to avoid running drops
|
||||||
|
process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct FwTab {
|
||||||
|
// pub dir_and_len_bits: [u32; 256],
|
||||||
|
pub dir_len_bits: [u16; 256],
|
||||||
|
pub dir: [u8; 256],
|
||||||
|
pub strs: [u8; 256],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FwTab {
|
||||||
|
pub fn build() -> Self {
|
||||||
|
// Sort fws by first char
|
||||||
|
let mut sorted_fws: Vec<Vec<&'static [u8]>> = vec![vec![]; 256];
|
||||||
|
|
||||||
|
for word in FORBIDDEN_WORDS {
|
||||||
|
sorted_fws[word[0] as usize].push(&word);
|
||||||
|
}
|
||||||
|
for i in 0..256 {
|
||||||
|
sorted_fws[i].sort()
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Build str tab
|
||||||
|
let mut fw_dir = [0u8; 256];
|
||||||
|
// let mut fw_dir_len_bits = [0u32; 256];
|
||||||
|
let mut fw_dir_len_bits = [0u16; 256];
|
||||||
|
let mut fw_strs: Vec<u8> = vec![];
|
||||||
|
|
||||||
|
fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries
|
||||||
|
|
||||||
|
for c in 0..256 {
|
||||||
|
for fw in FORBIDDEN_WORDS {
|
||||||
|
if c == fw[0] as usize {
|
||||||
|
fw_dir_len_bits[c] |= 0x1 << fw.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sorted_fws[c].is_empty() {
|
||||||
|
let sublist_start_offset = fw_strs.len().try_into().unwrap();
|
||||||
|
fw_dir[c] = sublist_start_offset;
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("{c} start offset: {}", sublist_start_offset);
|
||||||
|
println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Push strings
|
||||||
|
for fw in &sorted_fws[c] {
|
||||||
|
fw_strs.push(fw.len().try_into().unwrap());
|
||||||
|
for &c in &fw[1..] {
|
||||||
|
fw_strs.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark end of per-char word sublist
|
||||||
|
fw_strs.push(b'\0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("strs len: {}", fw_strs.len());
|
||||||
|
|
||||||
|
assert_eq!(fw_dir.len(), 256);
|
||||||
|
assert!(fw_strs.len() <= 256);
|
||||||
|
|
||||||
|
fw_strs.resize(256, 0);
|
||||||
|
|
||||||
|
let tab = FwTab {
|
||||||
|
dir: fw_dir,
|
||||||
|
dir_len_bits: fw_dir_len_bits,
|
||||||
|
// dir_and_len_bits: array::from_fn(|idx| {
|
||||||
|
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
|
||||||
|
// }),
|
||||||
|
strs: fw_strs.try_into().unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// DEBUG: Test some strings
|
||||||
|
unsafe {
|
||||||
|
dbg!(tab.lookup(b"cpm"));
|
||||||
|
dbg!(tab.lookup(b"com"));
|
||||||
|
dbg!(tab.lookup(b"coma"));
|
||||||
|
dbg!(tab.lookup(b"co"));
|
||||||
|
dbg!(tab.lookup(b"cam"));
|
||||||
|
dbg!(tab.lookup(b"crypto"));
|
||||||
|
dbg!(tab.lookup(b"@"));
|
||||||
|
dbg!(tab.lookup(b""));
|
||||||
|
dbg!(tab.lookup(b" "));
|
||||||
|
dbg!(tab.lookup(b"test"));
|
||||||
|
dbg!(tab.lookup(b"expers"));
|
||||||
|
}
|
||||||
|
|
||||||
|
tab
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compile(&self) {
|
||||||
|
println!("static FW_TAB: FwTab = FwTab {{");
|
||||||
|
|
||||||
|
// println!("\tdir_and_len_bits: [");
|
||||||
|
// for chunk in self.dir_and_len_bits.chunks(16) {
|
||||||
|
// print!("\t\t");
|
||||||
|
// for &b in chunk {
|
||||||
|
// print!("0x{b:08x}, ");
|
||||||
|
// }
|
||||||
|
// println!();
|
||||||
|
// }
|
||||||
|
// println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir: [");
|
||||||
|
for chunk in self.dir.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir_len_bits: [");
|
||||||
|
for chunk in self.dir_len_bits.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:04x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tstrs: [");
|
||||||
|
for chunk in self.strs.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("}};");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn lookup(&self, word: &[u8]) -> bool {
|
||||||
|
// let &[first_char, ..] = word else {
|
||||||
|
// return false;
|
||||||
|
// };
|
||||||
|
let first_char = unsafe { *word.get_unchecked(0) };
|
||||||
|
|
||||||
|
// let dir_and_len_bits = unsafe {
|
||||||
|
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
|
||||||
|
// };
|
||||||
|
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let len_bits = unsafe {
|
||||||
|
*self.dir_len_bits.get_unchecked(first_char as usize)
|
||||||
|
};
|
||||||
|
if hint::likely(word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
|
||||||
|
let mut str_offset = unsafe {
|
||||||
|
*self.dir.get_unchecked(first_char as usize) as usize
|
||||||
|
};
|
||||||
|
|
||||||
|
// Char doesn't have any strings in the table
|
||||||
|
if str_offset == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over strs
|
||||||
|
loop {
|
||||||
|
// let fw_len = u16::from_le_bytes([
|
||||||
|
// self.strs[str_offset],
|
||||||
|
// self.strs[str_offset+1]
|
||||||
|
// ]);
|
||||||
|
let fw_len: u8 = unsafe {
|
||||||
|
*self.strs.get_unchecked(str_offset)
|
||||||
|
};
|
||||||
|
|
||||||
|
if fw_len == 0 {
|
||||||
|
// We've reached the end of the word sublist
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only compare words if they are the same length
|
||||||
|
if hint::unlikely(word.len() == fw_len as usize) {
|
||||||
|
// Compare strs
|
||||||
|
let mut char_offset = 1usize;
|
||||||
|
loop {
|
||||||
|
// Found the word!
|
||||||
|
if char_offset == word.len() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) };
|
||||||
|
let word_char = unsafe { *word.get_unchecked(char_offset) };
|
||||||
|
|
||||||
|
if fw_char > word_char {
|
||||||
|
// Word can't possible be in the sorted list, return
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if fw_char < word_char {
|
||||||
|
// Try next word
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
char_offset += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance to next word
|
||||||
|
// let str_len_bytes = 2;
|
||||||
|
let str_len_bytes = 1;
|
||||||
|
str_offset += (fw_len as usize - 1) + str_len_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const FORBIDDEN_WORDS: [&'static [u8]; 35] = [
|
||||||
|
b"recovery",
|
||||||
|
b"techie",
|
||||||
|
b"http",
|
||||||
|
b"https",
|
||||||
|
b"digital",
|
||||||
|
b"hack",
|
||||||
|
b"::",
|
||||||
|
b"//",
|
||||||
|
b"com",
|
||||||
|
b"@",
|
||||||
|
b"crypto",
|
||||||
|
b"bitcoin",
|
||||||
|
b"wallet",
|
||||||
|
b"hacker",
|
||||||
|
b"welcome",
|
||||||
|
b"whatsapp",
|
||||||
|
b"email",
|
||||||
|
b"cryptocurrency",
|
||||||
|
b"stolen",
|
||||||
|
b"freeze",
|
||||||
|
b"quick",
|
||||||
|
b"crucial",
|
||||||
|
b"tracing",
|
||||||
|
b"scammers",
|
||||||
|
b"expers",
|
||||||
|
b"hire",
|
||||||
|
b"century",
|
||||||
|
b"transaction",
|
||||||
|
b"essential",
|
||||||
|
b"managing",
|
||||||
|
b"contact",
|
||||||
|
b"contacting",
|
||||||
|
b"understanding",
|
||||||
|
b"assets",
|
||||||
|
b"funds",
|
||||||
|
];
|
||||||
|
|
||||||
|
static FW_TAB: FwTab = FwTab {
|
||||||
|
dir: [
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
dir_len_bits: [
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
],
|
||||||
|
strs: [
|
||||||
|
0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! {
|
||||||
|
b"recovery",
|
||||||
|
b"techie",
|
||||||
|
b"http",
|
||||||
|
b"https",
|
||||||
|
b"digital",
|
||||||
|
b"hack",
|
||||||
|
b"::",
|
||||||
|
b"//",
|
||||||
|
b"com",
|
||||||
|
b"@",
|
||||||
|
b"crypto",
|
||||||
|
b"bitcoin",
|
||||||
|
b"wallet",
|
||||||
|
b"hacker",
|
||||||
|
b"welcome",
|
||||||
|
b"whatsapp",
|
||||||
|
b"email",
|
||||||
|
b"cryptocurrency",
|
||||||
|
b"stolen",
|
||||||
|
b"freeze",
|
||||||
|
b"quick",
|
||||||
|
b"crucial",
|
||||||
|
b"tracing",
|
||||||
|
b"scammers",
|
||||||
|
b"expers",
|
||||||
|
b"hire",
|
||||||
|
b"century",
|
||||||
|
b"transaction",
|
||||||
|
b"essential",
|
||||||
|
b"managing",
|
||||||
|
b"contact",
|
||||||
|
b"contacting",
|
||||||
|
b"understanding",
|
||||||
|
b"assets",
|
||||||
|
b"funds",
|
||||||
|
};
|
||||||
|
|
||||||
|
//static FW_TAB_DIR: [u8; 256] = [
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
//];
|
||||||
|
//static FW_TAB_STRS: [u8; 244] = [
|
||||||
|
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
// 0x61, 0x70, 0x70, 0x00,
|
||||||
|
//];
|
||||||
925
12bitfloat_rust/risspam/src/main_slow_monoio.rs
Normal file
925
12bitfloat_rust/risspam/src/main_slow_monoio.rs
Normal file
@ -0,0 +1,925 @@
|
|||||||
|
#![feature(likely_unlikely)]
|
||||||
|
|
||||||
|
mod books;
|
||||||
|
|
||||||
|
use crate::books::FULL_BOOK_PATHS;
|
||||||
|
use core_affinity::CoreId;
|
||||||
|
use libc::{aio_read, aiocb};
|
||||||
|
use memmap2::Mmap;
|
||||||
|
use rayon::prelude::*;
|
||||||
|
use std::cell::OnceCell;
|
||||||
|
use std::cell::RefCell;
|
||||||
|
use std::ffi::{OsStr, OsString};
|
||||||
|
use std::fs::File;
|
||||||
|
use std::io::Read;
|
||||||
|
use std::mem::MaybeUninit;
|
||||||
|
use std::ops::Deref;
|
||||||
|
use std::os::linux::raw::stat;
|
||||||
|
use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering};
|
||||||
|
use std::sync::{Arc, Mutex};
|
||||||
|
use std::thread::available_parallelism;
|
||||||
|
use std::time::{Duration, Instant};
|
||||||
|
use std::{array, env, fs, hint, mem, process, thread};
|
||||||
|
use monoio::IoUringDriver;
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_whitespace(b: u8) -> bool {
|
||||||
|
matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_upper(b: u8) -> bool {
|
||||||
|
matches!(b, b'A'..=b'Z')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
fn is_ascii_digit(b: u8) -> bool {
|
||||||
|
matches!(b, b'0'..=b'9')
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(align(128))]
|
||||||
|
#[derive(Copy, Clone)]
|
||||||
|
struct Stats {
|
||||||
|
pub sentences: u32,
|
||||||
|
pub words: u32,
|
||||||
|
pub capitalizeds: u32,
|
||||||
|
pub numbers: u32,
|
||||||
|
pub forbiddens: u32,
|
||||||
|
}
|
||||||
|
|
||||||
|
static TIME_SPENT_READING_FILES: Mutex<Duration> = Mutex::new(Duration::from_secs(0));
|
||||||
|
|
||||||
|
const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024;
|
||||||
|
thread_local! {
|
||||||
|
static WORK_STATE: RefCell<WorkState> = RefCell::new(WorkState::new());
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct WorkState {
|
||||||
|
pub work_mem: Box<[u8]>,
|
||||||
|
pub empty_vec: Box<[u8]>,
|
||||||
|
// pub io_mem: Box<[u8]>,
|
||||||
|
// pub curr_read: Option<aiocb>,
|
||||||
|
// pub had_first_load: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl WorkState {
|
||||||
|
pub fn new() -> Self {
|
||||||
|
Self {
|
||||||
|
work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
empty_vec: vec![].into_boxed_slice(),
|
||||||
|
// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(),
|
||||||
|
// curr_read: None,
|
||||||
|
// had_first_load: false,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn work(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
WORK_STATE.with_borrow_mut(|state: &mut WorkState| {
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
|
||||||
|
let mut file = File::open(file_path).unwrap();
|
||||||
|
let file_len = file.metadata().unwrap().len() as usize;
|
||||||
|
file.read_exact(&mut state.work_mem[..file_len]).unwrap();
|
||||||
|
let text = &state.work_mem[..file_len];
|
||||||
|
|
||||||
|
unsafe {
|
||||||
|
let mut cb = mem::zeroed();
|
||||||
|
|
||||||
|
aio_read(&raw mut cb);
|
||||||
|
}
|
||||||
|
|
||||||
|
// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice();
|
||||||
|
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
analyze(&text, stats);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
fn analyze(text: &[u8], stats: &mut Stats) {
|
||||||
|
// // NOTE: mmap is quite a bit slower
|
||||||
|
// // Load file
|
||||||
|
// let Ok(file) = File::open(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// std::process::abort();
|
||||||
|
// };
|
||||||
|
// let mmap = unsafe {
|
||||||
|
// Mmap::map(&file).unwrap()
|
||||||
|
// };
|
||||||
|
// mem::forget(file);
|
||||||
|
// let text = &*mmap;
|
||||||
|
|
||||||
|
// // Load file
|
||||||
|
// let start_time = Instant::now();
|
||||||
|
// let Ok(text) = fs::read(file_path) else {
|
||||||
|
// eprintln!("invalid file!");
|
||||||
|
// process::abort();
|
||||||
|
// };
|
||||||
|
// let time_reading = start_time.elapsed();
|
||||||
|
// {
|
||||||
|
// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap();
|
||||||
|
// *guard += time_reading;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
let mut idx = 0;
|
||||||
|
'full_loop: loop {
|
||||||
|
// TODO: Necessary for now
|
||||||
|
if idx >= text.len() {
|
||||||
|
break 'full_loop;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip whitespace
|
||||||
|
while is_ascii_whitespace(text[idx]) {
|
||||||
|
idx += 1;
|
||||||
|
if idx >= text.len() {
|
||||||
|
break 'full_loop;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find end of word
|
||||||
|
let word_start = idx;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
|
||||||
|
'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) {
|
||||||
|
idx += 1;
|
||||||
|
if idx >= text.len() {
|
||||||
|
break 'find_word_end;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-char logic
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
if !is_ascii_upper(b) {
|
||||||
|
has_non_upper = true;
|
||||||
|
}
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
// dbg!(str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check forbidden
|
||||||
|
if unsafe { FW_TAB.lookup(word) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
if unsafe { FW_TAB.lookup(token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
// NOTE: This is pretty slow:
|
||||||
|
let mut idx = 0;
|
||||||
|
let mut word_start = 0;
|
||||||
|
let mut is_in_word = false;
|
||||||
|
let mut has_non_upper = false;
|
||||||
|
loop {
|
||||||
|
let b = unsafe { *text.get_unchecked(idx) };
|
||||||
|
|
||||||
|
let mut process_word = false;
|
||||||
|
if is_ascii_whitespace(b) {
|
||||||
|
if is_in_word {
|
||||||
|
process_word = true;
|
||||||
|
|
||||||
|
// Reset state for next word
|
||||||
|
is_in_word = false;
|
||||||
|
has_non_upper = false;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if !is_in_word {
|
||||||
|
word_start = idx;
|
||||||
|
is_in_word = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
has_non_upper |= !is_ascii_upper(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
// Check sentences
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let word = &text[word_start..idx];
|
||||||
|
|
||||||
|
idx += 1;
|
||||||
|
|
||||||
|
if process_word || idx >= text.len() {
|
||||||
|
words += 1;
|
||||||
|
if !has_non_upper {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("'{}'", str::from_utf8(word).unwrap());
|
||||||
|
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx >= text.len() {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
fn analyze_old(file_path: &OsStr, stats: &mut Stats) {
|
||||||
|
// Load file
|
||||||
|
let Ok(text) = fs::read(file_path) else {
|
||||||
|
eprintln!("invalid file!");
|
||||||
|
std::process::abort();
|
||||||
|
};
|
||||||
|
|
||||||
|
let mut sentences = 0;
|
||||||
|
let mut words = 0;
|
||||||
|
let mut capitalizeds = 0;
|
||||||
|
let mut numbers = 0;
|
||||||
|
let mut forbiddens = 0;
|
||||||
|
|
||||||
|
for token in text.split(|&b| is_ascii_whitespace(b)) {
|
||||||
|
if token.is_empty() {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
words += 1;
|
||||||
|
|
||||||
|
// Sentence count, folded into this loop
|
||||||
|
// instead of another loop (better cache usage)
|
||||||
|
for &b in token {
|
||||||
|
if b == b'.' {
|
||||||
|
sentences += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if upper
|
||||||
|
if token.iter().all(|&b| is_ascii_upper(b)) {
|
||||||
|
capitalizeds += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check digits
|
||||||
|
for &b in token {
|
||||||
|
if is_ascii_digit(b) {
|
||||||
|
numbers += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if words
|
||||||
|
// if FORBIDDEN_WORDS.contains(&token) {
|
||||||
|
if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } {
|
||||||
|
forbiddens += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
stats.sentences = sentences;
|
||||||
|
stats.words = words;
|
||||||
|
stats.capitalizeds = capitalizeds;
|
||||||
|
stats.numbers = numbers;
|
||||||
|
stats.forbiddens = forbiddens;
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
// Read in files from args
|
||||||
|
let mut files = Vec::with_capacity(env::args().len());
|
||||||
|
let mut do_parallel = false;
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
for arg in env::args_os().skip(1) {
|
||||||
|
// skip program arg
|
||||||
|
if arg == "-p" {
|
||||||
|
do_parallel = true;
|
||||||
|
} else {
|
||||||
|
files.push(arg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
println!("[PROFILE] taking args took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// env::args_os().
|
||||||
|
|
||||||
|
// let files = FULL_BOOK_PATHS;
|
||||||
|
|
||||||
|
// // Build table
|
||||||
|
// let tab = FwTab::build();
|
||||||
|
// tab.compile();
|
||||||
|
|
||||||
|
// Do the work
|
||||||
|
let mut stats = vec![Stats {
|
||||||
|
sentences: 0,
|
||||||
|
words: 0,
|
||||||
|
capitalizeds: 0,
|
||||||
|
numbers: 0,
|
||||||
|
forbiddens: 0,
|
||||||
|
}; files.len()];
|
||||||
|
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let num_cores = available_parallelism().unwrap().get();
|
||||||
|
let num_threads = num_cores * 1;
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
dbg!(num_threads);
|
||||||
|
dbg!(num_cores);
|
||||||
|
|
||||||
|
let next_file_idx = &*Box::leak(Box::new(AtomicUsize::new(0)));
|
||||||
|
|
||||||
|
thread::scope(|scope| {
|
||||||
|
for thread_idx in 0..num_threads {
|
||||||
|
// Set thread affinity
|
||||||
|
assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
|
||||||
|
|
||||||
|
let cap_next_file_idx = &next_file_idx;
|
||||||
|
let cap_stats_ptr = stats.as_ptr() as usize;
|
||||||
|
let cap_files = &files;
|
||||||
|
thread::Builder::new().spawn_scoped(scope, move || {
|
||||||
|
let files = cap_files;
|
||||||
|
|
||||||
|
// let exec = glommio::LocalExecutorBuilder::new(Placement::Unbound).make().unwrap();
|
||||||
|
// exec.run(async {
|
||||||
|
// println!("Running in glommio thread {core_idx}");
|
||||||
|
// });
|
||||||
|
|
||||||
|
let mut rt = monoio::RuntimeBuilder::<IoUringDriver>::new()
|
||||||
|
.build()
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let mut work_mem = vec![0; TEMP_MEM_SIZE].into_boxed_slice();
|
||||||
|
let files_per_thread = files.len() / num_threads;
|
||||||
|
|
||||||
|
rt.block_on(async {
|
||||||
|
// // Claim next file id
|
||||||
|
// loop {
|
||||||
|
// let work_idx = cap_next_file_idx.fetch_add(1, Ordering::Relaxed);
|
||||||
|
// if work_idx >= files.len() {
|
||||||
|
// return;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Do work
|
||||||
|
let thread_start = thread_idx * files_per_thread;
|
||||||
|
for i in 0..files_per_thread {
|
||||||
|
let work_idx = thread_start + i;
|
||||||
|
|
||||||
|
let path = &files[work_idx];
|
||||||
|
let stat = unsafe {
|
||||||
|
&mut *(cap_stats_ptr as *mut Stats)
|
||||||
|
.offset(work_idx as isize)
|
||||||
|
};
|
||||||
|
|
||||||
|
let file = monoio::fs::File::open(path)
|
||||||
|
.await
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
struct CappedReadBuf(pub Box<[u8]>, usize);
|
||||||
|
unsafe impl monoio::buf::IoBufMut for CappedReadBuf {
|
||||||
|
fn write_ptr(&mut self) -> *mut u8 {
|
||||||
|
monoio::buf::IoBufMut::write_ptr(&mut self.0)
|
||||||
|
}
|
||||||
|
|
||||||
|
fn bytes_total(&mut self) -> usize {
|
||||||
|
self.1
|
||||||
|
}
|
||||||
|
|
||||||
|
unsafe fn set_init(&mut self, pos: usize) {
|
||||||
|
monoio::buf::IoBufMut::set_init(&mut self.0, pos)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let meta = file.metadata().await.unwrap();
|
||||||
|
let io_mem = mem::take(&mut work_mem);
|
||||||
|
let (res, buf) = file.read_exact_at(CappedReadBuf(io_mem, meta.len() as usize), 0).await;
|
||||||
|
res.unwrap();
|
||||||
|
work_mem = buf.0;
|
||||||
|
|
||||||
|
analyze(&work_mem, stat);
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
// exec.run(async {
|
||||||
|
// // Claim next file id
|
||||||
|
// let work_idx = cap_next_file_idx.fetch_add(1, Ordering::Relaxed);
|
||||||
|
//
|
||||||
|
// let path = &files[work_idx];
|
||||||
|
// let stat = unsafe {
|
||||||
|
// &mut *(stats_ptr as *mut Stats)
|
||||||
|
// .offset(work_idx as isize)
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// work(path, stat);
|
||||||
|
// });
|
||||||
|
}).unwrap();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// rayon::ThreadPoolBuilder::new()
|
||||||
|
// .num_threads(num_threads)
|
||||||
|
// .build_global()
|
||||||
|
// .unwrap();
|
||||||
|
//
|
||||||
|
// files.par_iter()
|
||||||
|
// .enumerate()
|
||||||
|
// .for_each(|(idx, p)| {
|
||||||
|
// let s = unsafe {
|
||||||
|
// &mut *stats.as_ptr()
|
||||||
|
// .offset(idx as isize)
|
||||||
|
// .cast_mut()
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
//// let mut path = OsString::from("../../");
|
||||||
|
//// path.push(p);
|
||||||
|
// let path = p;
|
||||||
|
// work(path, s);
|
||||||
|
// });
|
||||||
|
|
||||||
|
// thread::scope(|scope| {
|
||||||
|
// let files_per_thread = files.len() / num_threads;
|
||||||
|
//
|
||||||
|
// for thread_idx in 0..num_threads {
|
||||||
|
// let capture_files = &files;
|
||||||
|
// let capture_stats = &stats;
|
||||||
|
// thread::Builder::new().spawn_scoped(scope, move || {
|
||||||
|
// let files = capture_files;
|
||||||
|
// let stats = capture_stats;
|
||||||
|
//
|
||||||
|
// // Set thread affinity
|
||||||
|
// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores }));
|
||||||
|
//
|
||||||
|
// // Do work
|
||||||
|
// let thread_start = thread_idx * files_per_thread;
|
||||||
|
// for i in 0..files_per_thread {
|
||||||
|
// let real_idx = thread_start + i;
|
||||||
|
// let file_path = &files[real_idx];
|
||||||
|
// let st = unsafe {
|
||||||
|
// &mut *stats.as_ptr()
|
||||||
|
// .offset(real_idx as isize)
|
||||||
|
// .cast_mut()
|
||||||
|
// };
|
||||||
|
//
|
||||||
|
// work(&file_path, st);
|
||||||
|
// }
|
||||||
|
// }).unwrap();
|
||||||
|
// }
|
||||||
|
// });
|
||||||
|
|
||||||
|
println!("[PROFILE] processing text took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
// Accumulate stats
|
||||||
|
let start_time = Instant::now();
|
||||||
|
|
||||||
|
let mut total_words = 0;
|
||||||
|
let mut total_capitalizeds = 0;
|
||||||
|
let mut total_sentences = 0;
|
||||||
|
let mut total_numbers = 0;
|
||||||
|
let mut total_forbiddens = 0;
|
||||||
|
|
||||||
|
for stat in &stats {
|
||||||
|
total_words += stat.words;
|
||||||
|
total_capitalizeds += stat.capitalizeds;
|
||||||
|
total_sentences += stat.sentences;
|
||||||
|
total_numbers += stat.numbers;
|
||||||
|
total_forbiddens += stat.forbiddens;
|
||||||
|
}
|
||||||
|
|
||||||
|
let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0;
|
||||||
|
let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0;
|
||||||
|
let word_count_per_sentence = total_words as f32 / total_sentences as f32;
|
||||||
|
|
||||||
|
println!();
|
||||||
|
println!("Total Words: {total_words}");
|
||||||
|
println!("Total Capitalized words: {total_capitalizeds}");
|
||||||
|
println!("Total Sentences: {total_sentences}");
|
||||||
|
println!("Total Numbers: {total_numbers}");
|
||||||
|
println!("Total Forbidden words: {total_forbiddens}");
|
||||||
|
println!("Capitalized percentage: {capitalized_percentage:.6}");
|
||||||
|
println!("Forbidden percentage: {forbidden_percentage:.6}");
|
||||||
|
println!("Word count per sentence: {word_count_per_sentence:.6}");
|
||||||
|
println!("Total files read: {}", files.len());
|
||||||
|
|
||||||
|
println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed());
|
||||||
|
|
||||||
|
println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap());
|
||||||
|
|
||||||
|
// Exit process to avoid running drops
|
||||||
|
process::exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[repr(C)]
|
||||||
|
struct FwTab {
|
||||||
|
// pub dir_and_len_bits: [u32; 256],
|
||||||
|
pub dir_len_bits: [u16; 256],
|
||||||
|
pub dir: [u8; 256],
|
||||||
|
pub strs: [u8; 256],
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FwTab {
|
||||||
|
pub fn build() -> Self {
|
||||||
|
// Sort fws by first char
|
||||||
|
let mut sorted_fws: Vec<Vec<&'static [u8]>> = vec![vec![]; 256];
|
||||||
|
|
||||||
|
for word in FORBIDDEN_WORDS {
|
||||||
|
sorted_fws[word[0] as usize].push(&word);
|
||||||
|
}
|
||||||
|
for i in 0..256 {
|
||||||
|
sorted_fws[i].sort()
|
||||||
|
}
|
||||||
|
|
||||||
|
// // DEBUG:
|
||||||
|
// println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Build str tab
|
||||||
|
let mut fw_dir = [0u8; 256];
|
||||||
|
// let mut fw_dir_len_bits = [0u32; 256];
|
||||||
|
let mut fw_dir_len_bits = [0u16; 256];
|
||||||
|
let mut fw_strs: Vec<u8> = vec![];
|
||||||
|
|
||||||
|
fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries
|
||||||
|
|
||||||
|
for c in 0..256 {
|
||||||
|
for fw in FORBIDDEN_WORDS {
|
||||||
|
if c == fw[0] as usize {
|
||||||
|
fw_dir_len_bits[c] |= 0x1 << fw.len();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !sorted_fws[c].is_empty() {
|
||||||
|
let sublist_start_offset = fw_strs.len().try_into().unwrap();
|
||||||
|
fw_dir[c] = sublist_start_offset;
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("{c} start offset: {}", sublist_start_offset);
|
||||||
|
println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::<Vec<_>>());
|
||||||
|
|
||||||
|
// Push strings
|
||||||
|
for fw in &sorted_fws[c] {
|
||||||
|
fw_strs.push(fw.len().try_into().unwrap());
|
||||||
|
for &c in &fw[1..] {
|
||||||
|
fw_strs.push(c);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Mark end of per-char word sublist
|
||||||
|
fw_strs.push(b'\0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DEBUG:
|
||||||
|
println!("strs len: {}", fw_strs.len());
|
||||||
|
|
||||||
|
assert_eq!(fw_dir.len(), 256);
|
||||||
|
assert!(fw_strs.len() <= 256);
|
||||||
|
|
||||||
|
fw_strs.resize(256, 0);
|
||||||
|
|
||||||
|
let tab = FwTab {
|
||||||
|
dir: fw_dir,
|
||||||
|
dir_len_bits: fw_dir_len_bits,
|
||||||
|
// dir_and_len_bits: array::from_fn(|idx| {
|
||||||
|
// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24)
|
||||||
|
// }),
|
||||||
|
strs: fw_strs.try_into().unwrap(),
|
||||||
|
};
|
||||||
|
|
||||||
|
// DEBUG: Test some strings
|
||||||
|
unsafe {
|
||||||
|
dbg!(tab.lookup(b"cpm"));
|
||||||
|
dbg!(tab.lookup(b"com"));
|
||||||
|
dbg!(tab.lookup(b"coma"));
|
||||||
|
dbg!(tab.lookup(b"co"));
|
||||||
|
dbg!(tab.lookup(b"cam"));
|
||||||
|
dbg!(tab.lookup(b"crypto"));
|
||||||
|
dbg!(tab.lookup(b"@"));
|
||||||
|
dbg!(tab.lookup(b""));
|
||||||
|
dbg!(tab.lookup(b" "));
|
||||||
|
dbg!(tab.lookup(b"test"));
|
||||||
|
dbg!(tab.lookup(b"expers"));
|
||||||
|
}
|
||||||
|
|
||||||
|
tab
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn compile(&self) {
|
||||||
|
println!("static FW_TAB: FwTab = FwTab {{");
|
||||||
|
|
||||||
|
// println!("\tdir_and_len_bits: [");
|
||||||
|
// for chunk in self.dir_and_len_bits.chunks(16) {
|
||||||
|
// print!("\t\t");
|
||||||
|
// for &b in chunk {
|
||||||
|
// print!("0x{b:08x}, ");
|
||||||
|
// }
|
||||||
|
// println!();
|
||||||
|
// }
|
||||||
|
// println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir: [");
|
||||||
|
for chunk in self.dir.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tdir_len_bits: [");
|
||||||
|
for chunk in self.dir_len_bits.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:04x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("\tstrs: [");
|
||||||
|
for chunk in self.strs.chunks(16) {
|
||||||
|
print!("\t\t");
|
||||||
|
for &b in chunk {
|
||||||
|
print!("0x{b:02x}, ");
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
}
|
||||||
|
println!("\t],");
|
||||||
|
|
||||||
|
println!("}};");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[inline]
|
||||||
|
pub unsafe fn lookup(&self, word: &[u8]) -> bool {
|
||||||
|
let &[first_char, ..] = word else {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
|
||||||
|
// let dir_and_len_bits = unsafe {
|
||||||
|
// *self.dir_and_len_bits.get_unchecked(first_char as usize)
|
||||||
|
// };
|
||||||
|
// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 {
|
||||||
|
// return false;
|
||||||
|
// }
|
||||||
|
|
||||||
|
let len_bits = unsafe {
|
||||||
|
*self.dir_len_bits.get_unchecked(first_char as usize)
|
||||||
|
};
|
||||||
|
if word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// let mut str_offset = (dir_and_len_bits >> 24) as usize;
|
||||||
|
let mut str_offset = unsafe {
|
||||||
|
*self.dir.get_unchecked(first_char as usize) as usize
|
||||||
|
};
|
||||||
|
|
||||||
|
// Char doesn't have any strings in the table
|
||||||
|
if str_offset == 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over strs
|
||||||
|
loop {
|
||||||
|
// let fw_len = u16::from_le_bytes([
|
||||||
|
// self.strs[str_offset],
|
||||||
|
// self.strs[str_offset+1]
|
||||||
|
// ]);
|
||||||
|
let fw_len: u8 = unsafe {
|
||||||
|
*self.strs.get_unchecked(str_offset)
|
||||||
|
};
|
||||||
|
|
||||||
|
if fw_len == 0 {
|
||||||
|
// We've reached the end of the word sublist
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only compare words if they are the same length
|
||||||
|
if word.len() == fw_len as usize {
|
||||||
|
// Compare strs
|
||||||
|
let mut char_offset = 1usize;
|
||||||
|
loop {
|
||||||
|
// Found the word!
|
||||||
|
if char_offset == word.len() {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) };
|
||||||
|
let word_char = unsafe { *word.get_unchecked(char_offset) };
|
||||||
|
|
||||||
|
if fw_char > word_char {
|
||||||
|
// Word can't possible be in the sorted list, return
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if fw_char < word_char {
|
||||||
|
// Try next word
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
char_offset += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Advance to next word
|
||||||
|
// let str_len_bytes = 2;
|
||||||
|
let str_len_bytes = 1;
|
||||||
|
str_offset += (fw_len as usize - 1) + str_len_bytes;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const FORBIDDEN_WORDS: [&'static [u8]; 35] = [
|
||||||
|
b"recovery",
|
||||||
|
b"techie",
|
||||||
|
b"http",
|
||||||
|
b"https",
|
||||||
|
b"digital",
|
||||||
|
b"hack",
|
||||||
|
b"::",
|
||||||
|
b"//",
|
||||||
|
b"com",
|
||||||
|
b"@",
|
||||||
|
b"crypto",
|
||||||
|
b"bitcoin",
|
||||||
|
b"wallet",
|
||||||
|
b"hacker",
|
||||||
|
b"welcome",
|
||||||
|
b"whatsapp",
|
||||||
|
b"email",
|
||||||
|
b"cryptocurrency",
|
||||||
|
b"stolen",
|
||||||
|
b"freeze",
|
||||||
|
b"quick",
|
||||||
|
b"crucial",
|
||||||
|
b"tracing",
|
||||||
|
b"scammers",
|
||||||
|
b"expers",
|
||||||
|
b"hire",
|
||||||
|
b"century",
|
||||||
|
b"transaction",
|
||||||
|
b"essential",
|
||||||
|
b"managing",
|
||||||
|
b"contact",
|
||||||
|
b"contacting",
|
||||||
|
b"understanding",
|
||||||
|
b"assets",
|
||||||
|
b"funds",
|
||||||
|
];
|
||||||
|
|
||||||
|
static FW_TAB: FwTab = FwTab {
|
||||||
|
dir: [
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
dir_len_bits: [
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
|
||||||
|
],
|
||||||
|
strs: [
|
||||||
|
0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
],
|
||||||
|
};
|
||||||
|
|
||||||
|
//static FW_TAB_DIR: [u8; 256] = [
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00,
|
||||||
|
// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||||
|
//];
|
||||||
|
//static FW_TAB_STRS: [u8; 244] = [
|
||||||
|
// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00,
|
||||||
|
// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03,
|
||||||
|
// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74,
|
||||||
|
// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f,
|
||||||
|
// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07,
|
||||||
|
// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65,
|
||||||
|
// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65,
|
||||||
|
// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b,
|
||||||
|
// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00,
|
||||||
|
// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08,
|
||||||
|
// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73,
|
||||||
|
// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61,
|
||||||
|
// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00,
|
||||||
|
// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61,
|
||||||
|
// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73,
|
||||||
|
// 0x61, 0x70, 0x70, 0x00,
|
||||||
|
//];
|
||||||
2
Makefile
2
Makefile
@ -32,6 +32,8 @@ build_all: build build_py build_cpp build_borded_cpp build_risspam build_jest
|
|||||||
run: run_spam wl run_not_spam
|
run: run_spam wl run_not_spam
|
||||||
run_risspam: run_spam_risspam run_not_spam_risspam
|
run_risspam: run_spam_risspam run_not_spam_risspam
|
||||||
|
|
||||||
|
bench_rust: build_risspam benchmark_only
|
||||||
|
|
||||||
format:
|
format:
|
||||||
clang-format *.c *.h -i
|
clang-format *.c *.h -i
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user