diff --git a/.gitignore b/.gitignore index 206928b..4efded9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,14 +1,14 @@ .r_history .history -.vscode -publish -books -__pycache__ -target +.vscode/ +publish/ +books/ +__pycache__/ +target/ ./isspam.py -isspam -risspam +/isspam +/risspam /jisspam -isspam_cpp +/isspam_cpp +/borded_cpp_exec .build-trigger-2014-12-02 15:26 -borded_cpp_exec diff --git a/12bitfloat_rust/risspam/.cargo/config.toml b/12bitfloat_rust/risspam/.cargo/config.toml new file mode 100644 index 0000000..df7011c --- /dev/null +++ b/12bitfloat_rust/risspam/.cargo/config.toml @@ -0,0 +1,5 @@ +[build] +rustflags = [ + "-Ztls-model=initial-exec", + "-Ctarget-cpu=native" +] diff --git a/12bitfloat_rust/risspam/src/books.rs b/12bitfloat_rust/risspam/src/books.rs new file mode 100644 index 0000000..6dc756f --- /dev/null +++ b/12bitfloat_rust/risspam/src/books.rs @@ -0,0 +1,907 @@ + +pub static FULL_BOOK_PATHS: &[&'static str] = &[ + "books/0192806807.pdf - Unknown.txt", + "books/0_Deep Learning Cookbook - Practical Recipes to Get Started Quickly.txt", + "books/0_Deep Learning for Search.txt", + "books/0_Deep Learning with Python.txt", + "books/10Algorithms-08.txt", + "books/1407.7502v3.txt", + "books/1491912766_Advanced.txt", + "books/18374.txt", + "books/2014-data-science-salary-survey.txt", + "books/21 Recipes for Mining Twitter.txt", + "books/240415.txt", + "books/3dprinting.txt", + "books/9780077418182.pdf - W. Schiff.txt", + "books/A Developer’s Guide to the Semantic Web.txt", + "books/Advanced Analytics with Spark - Patterns for Learning from Data at Scale.txt", + "books/Advanced Analytics with Spark - Sandy Ryza, Uri Laserson, Sean Owen.txt", + "books/AdvancedBashScripting.txt", + "books/advanced-microservices.txt", + "books/Advanced Techniques in Web Intelligence – Part II.txt", + "books/Advanced Techniques in Web Intelligence – Part I.txt", + "books/Agile Business Intelligence.txt", + "books/Agile Data Science.txt", + "books/Agile Estimating and Planning.txt", + "books/Agile for Everybody - Creating Fast, Flexible, and Customer First Organizations.txt", + "books/Agile Methods - Large-Scale Development, Refactoring, Testing, and Estimation.txt", + "books/Agile Retrospectives - Making Good Teams Great.txt", + "books/Agile_Software_Development.txt", + "books/Agile Testing - A Practical Guide for Testers and Agile Teams.txt", + "books/Algorithmic Graph Theory and Sage.txt", + "books/Algorithms for Interviews.txt", + "books/algoritmos-programacion-Python.txt", + "books/a-little-book-of-r-for-time-series.txt", + "books/Amazon Web Services in Action.txt", + "books/[Andreas_M._Antonopoulos]_Mastering_Bitcoin_Unloc(BookZZ.org).txt", + "books/android9developmentcookbook.txt", + "books/AndroidForensics.txt", + "books/androidprogrammingforbeginners.txt", + "books/AndroidProgrammingPushingTheLimits.txt", + "books/AndroidSensorProgramming.txt", + "books/AndroidUIDesign.txt", + "books/AngualrJS Fundamentals.txt", + "books/angular-2-test-driven-development-2nd.txt", + "books/Angular2.txt", + "books/angular6forenterprise-readywebapplications.txt", + "books/Angular in Action.txt", + "books/AngularJS by Example.txt", + "books/AngularJS by Example - Unknown.txt", + "books/AngularJsNoviceToNinja.txt", + "books/AngularJS.txt", + "books/Angular.txt", + "books/angular_upandrunning.txt", + "books/AnIntroductionToGCC.txt", + "books/AnIntroductionToGNUMakeTool.txt", + "books/An Introduction to Information Retreival.txt", + "books/An Introduction to Machine Learning Interpretability.txt", + "books/antitextbookGo.txt", + "books/Anything You Want - 40 Lessons for a New Kind of Entrepreneur.txt", + "books/Apache Kafka Cookbook.txt", + "books/Apache Mesos Cookbook.txt", + "books/Apache Sqoop Cookbook.txt", + "books/ApacheTomcatCookbook.txt", + "books/API Design Cookbook.txt", + "books/api-driven-devops.txt", + "books/APIs A Strategy Guide.txt", + "books/Applied Text Analysis with Python - Enabling Language Aware Data Products with Machine Learning.txt", + "books/AprendiendoJavaScript(spanish).txt", + "books/Architecting Modern Data Platforms - A Guide To Enterprise Hadoop At Scale.txt", + "books/architectingmodernjavaeeapplications.txt", + "books/Arduino_Succinctly.txt", + "books/artificialintelligenceinthe21stcentury.txt", + "books/Art of Agile Development.txt", + "books/artofdatascience.txt", + "books/aspectos_avanzados_en_seguridad_en_redes_modulos.txt", + "books/aspnetcore2andangular5.txt", + "books/ASPNetCore.txt", + "books/aspnetmvc4_Succinctly.txt", + "books/ASP.NET_MVC_Succinctly.txt", + "books/Atomic Habits - An Easy & Proven Way to Build Good Habits & Break Bad Ones.txt", + "books/autocad2019beginningandintermediate.txt", + "books/autocad20203dmodeling.txt", + "books/autodeskrevit2020architecture.txt", + "books/Automate the Boring Stuff with Python.txt", + "books/A Workflow Approach to Stream Processing.txt", + "books/Bad Data Handbook - Cleaning Up The Data So You Can Get Back To Work.txt", + "books/bashcookbook.txt", + "books/Bash Guide for Beginners.txt", + "books/BasicsProgrammableLogicControllerPrinciples.txt", + "books/Bayesian_computation_with_R-libre.txt", + "books/Bayesian Networks and Influence Diagrams A Guide to Construction and Analysis.txt", + "books/Bayesian Reasoning and Machine Learning .txt", + "books/bdd-in-action.txt", + "books/BDD.txt", + "books/Beautiful Code.txt", + "books/Beautiful_Code.txt", + "books/Beautiful Data.txt", + "books/Beautiful Visualization.txt", + "books/become-ninja-angular2.txt", + "books/Beginning Amazon Web Services with Node.js.txt", + "books/BeginningAndroidGames.txt", + "books/BeginningJSON.txt", + "books/Big_Data_Analytics_with_R.txt", + "books/Big Data Analytics with Spark - A Practitioner's Guide to Using Spark for Large Scale Data Analysis.txt", + "books/Big Data, Data Mining and Machine Learning.txt", + "books/Big Data For Dummies.txt", + "books/Big Data Glossary.txt", + "books/Blockchain.txt", + "books/bookL.txt", + "books/book-no-solutions-aug-21-2014.txt", + "books/book.txt", + "books/BootstrapCookbook.txt", + "books/build-apis-you-wont-hate.txt", + "books/Building Adaptable Software with Microservices.txt", + "books/Building Evolutionary Architectures.txt", + "books/Building Hypermedia APIs with HTML5 and Node.txt", + "books/Building Hypermedia APIs with HTML5 and No - Mike Amundsen.txt", + "books/Building Machine Learning Projects with TensorFlow.txt", + "books/Building Machine Learning Systems with Python.txt", + "books/building-microservices-designing-fine-grained-systems.txt", + "books/Building Microservices.txt", + "books/buildingrestfulpythonwebservices - Unknown.txt", + "books/Building-web-apps-with-Node.js.txt", + "books/BuildingWebAppsWithNode.js.txt", + "books/Business Adventures - Twelve Classic Tales from the World of Wall Street.txt", + "books/Business Intelligence Data Mining and Optimization for Decision Making.txt", + "books/business-models-for-data-economy.txt", + "books/Can I Freeze It_ _ How to Use the Most Ver - Susie Theodorou.txt", + "books/Category Theory for Computer Science.txt", + "books/Category Theory for Computer Science - Unknown.txt", + "books/C++_CreatingGamesStepByStepGUIDE.txt", + "books/Chapter-13-Association-Rules.txt", + "books/Chapter-14-Cluster-Analysis.txt", + "books/Chapter-16-Regression-Based-Forecasting.txt", + "books/Chapter-1-Introduction.txt", + "books/Chapter-2-Overview-of-the-Data-Mining-Process.txt", + "books/Chapter-3-Data-Visualization.txt", + "books/Chapter 4_Dimension Reduction (Data Mining - Nitin R. Patel.txt", + "books/Chapter-4-Dimension-Reduction.txt", + "books/Chapter-5-Evaluating-Classification-and-Predictive-Performance.txt", + "books/Chapter-6-Multiple-Linear-Regression.txt", + "books/Chapter-7-k-Nearest-Neighbors--k-NN-.txt", + "books/Chapter-8-Naive-Bayes.txt", + "books/Chapter-9-Classification-and-Regression-Trees.txt", + "books/Christian Rudder-Dataclysm_ who we are (when we think no one's looking)-Crown (2014).txt", + "books/ciml-v0_9-all.txt", + "books/Classic Computer Science Problems in Python.txt", + "books/classicgamedesign.txt", + "books/classicshellscripting.txt", + "books/Clean Code - A Handbook of Agile Software Craftsmanship.txt", + "books/clean-coder-conduct-professional-programmers.txt", + "books/Clean Code.txt", + "books/cloudcomputingbasics_aselfteachingintroduction.txt", + "books/Cloud Native DevOps with Kubernetes.txt", + "books/Cloud Native Patterns - Designing change tolerant software.txt", + "books/Code Complete - A Practical Handbook of Software Construction.txt", + "books/Code Complete.txt", + "books/Collaborative filtering.txt", + "books/Collective Intelligence in Action.txt", + "books/Collective Intelligence.txt", + "books/Competing Against Luck - The Story of Innovation and Customer Choice.txt", + "books/Compilers-Principles-TechniquesAndTools2ndEdition.txt", + "books/Computational Intelligence.txt", + "books/Concurrency in Go_ Tools and Techniques fo - Katherine Cox-Buday.txt", + "books/Concurrency in Go - Tools and Techniques for Developers.txt", + "books/ConsumersurplusatUber_PR.txt", + "books/Contagious - Why Things Catch On.txt", + "books/Conversion_Optimization.txt", + "books/cover - Jolcia.txt", + "books/Cplusplus_Today.txt", + "books/cprogramming_aselfteachingintroduction.txt", + "books/C Programming - PhD Rajiv Chopra.txt", + "books/Create a Data Driven Organization.txt", + "books/Creating a Data-Driven Organization - Carl Anderson.txt", + "books/Crossing the Chasm - Marketing and Selling Disruptive Products to Mainstream Customers.txt", + "books/Crypto101.txt", + "books/CSharpProfesional.txt", + "books/C_Sharp_Succinctly.txt", + "books/CSS3_Succinctly.txt", + "books/CSS3.txt", + "books/CSS FlexBox.txt", + "books/CssGridLayout.txt", + "books/CSS in Depth.txt", + "books/CSSMaster2ndEdition.txt", + "books/csspocketreference.txt", + "books/CSS-Programming-Cookbook.txt", + "books/CSS_Secrets.txt", + "books/css_thedefinitiveguide.txt", + "books/CursoHTML5.txt", + "books/D3-Tips-and-Tricks.txt", + "books/Daily Rituals - How Great Minds Make Time, Find Inspiration, and Get to Work.txt", + "books/Dark Web Exploring and Data Mining the Dark Side of the Web.txt", + "books/Dart-A-Modern-Web-Language.txt", + "books/dart_in_action.txt", + "books/dart_programming_tutorial.txt", + "books/Data_Algorithms (1).txt", + "books/Data_Algorithms.txt", + "books/Data Analysis with Open Source Tools.txt", + "books/Data_Analytics_in_Sports.txt", + "books/Data_Analytics_with_Hadoop (1).txt", + "books/datacleaning_pocketprimer.txt", + "books/data-driven.txt", + "books/datalog2011-dedalus.txt", + "books/Data Mashups in R.txt", + "books/Data Mining and Statistics for Decision Making.txt", + "books/DataMining-ch1.txt", + "books/DataMining-ch2.txt", + "books/DataMining-ch3.txt", + "books/DataMining-ch4.txt", + "books/DataMining-ch4 - Unknown.txt", + "books/DataMining-ch5.txt", + "books/DataMining-ch6.txt", + "books/DataMining-ch7.txt", + "books/Datamining-ch8.txt", + "books/Data Mining Concepts and Techniques.txt", + "books/Data Mining - Concepts, Models, Methods, and Algorithms.txt", + "books/Data Mining Methods for Recommender Systems.txt", + "books/Data Mining - Practical Machine Learning Tools and Techniques.txt", + "books/datamining.txt", + "books/Data_Science_from_Scratch (1).txt", + "books/Data Science from Scratch - Joel Grus.txt", + "books/Data Science from Scratch.txt", + "books/Data_Science_from_Scratch.txt", + "books/Data Source Handbook.txt", + "books/Data Stream Mining - A Practical Approach.txt", + "books/Data Structures and Algorithms.txt", + "books/Data Structures and Algorithms with JavaScript.txt", + "books/datastyle.txt", + "books/Data Visualization with D3.js Cookbook.txt", + "books/datavisualizationwithpythonandjavascript.txt", + "books/Data_Visualization_with_Python_and_JavaScript.txt", + "books/data-wrangling-cheatsheet.txt", + "books/Data Wrangling with JavaScript.txt", + "books/Data_Wrangling_with_Python (1).txt", + "books/dbSecurityBook.txt", + "books/DE_0_PHYTON -.txt", + "books/Dealing with China - An Insider Unmasks the New Economic Superpower.txt", + "books/DebianHandBookSpanish.txt", + "books/Debugging Teams - Better Productivity through Collaboration.txt", + "books/Decision Support Systems For Business Intelligence.txt", + "books/Deep Work - Cal Newport.txt", + "books/Deep Work - Rules for Focused Success in a Distracted World.txt", + "books/Dependency Injection Principles, Practices, and Patterns.txt", + "books/Design Driven Testing.txt", + "books/Design for How People Think - Using Brain Science to Build Better Products.txt", + "books/Designing Data-Intensive Applications - The Big Ideas Behind Reliable, Scalable and Maintainable Systems.txt", + "books/Designing_Data_Intensive_Applications.txt", + "books/Designing Data-Intensive Web Applications.txt", + "books/Designing Data Visualizations.txt", + "books/Designing Interfaces - Patterns for Effective Interaction Design.txt", + "books/DesigningUXForms.txt", + "books/designingwebapis.txt", + "books/Designing with the Mind in Mind Simple Guide to Understanding User Interface Design Rules.txt", + "books/Designing with the Mind in Mind Simple Gui - Unknown.txt", + "books/DesignPatterns.txt", + "books/developer-testing.txt", + "books/Developing Large Web Applications.txt", + "books/developing-microservices-node-js.txt", + "books/Developing Microservices with Node.js.txt", + "books/devops-2-0-toolkit.txt", + "books/devops-2-1-toolkit-deploying-monitoring.txt", + "books/DevOps Automation Cookbook.txt", + "books/devops-web-development.txt", + "books/DiveIntoPython.txt", + "books/django2webdevelopmentcookbook.txt", + "books/docker-bootcamp.txt", + "books/DockerContainerizationCookbook.txt", + "books/docker-cookbook-solutions-examples.txt", + "books/docker-in-action.txt", + "books/Docker in Action.txt", + "books/docker-in-practice.txt", + "books/Docker in Practice.txt", + "books/docker-orchestration.txt", + "books/Docker_ Up and Running - Matthias, Karl.txt", + "books/Docker Up and Running.txt", + "books/Docker_Up_and_Running.txt", + "books/domain-driven-design-distilled.txt", + "books/DotNETCore.txt", + "books/ECMAScript_6.txt", + "books/effectiveawk.txt", + "books/Effective_DevOps.txt", + "books/EF JS sonsivri.txt", + "books/Elasticsearch Blueprints - A practical project-based guide to generating compelling search solutions using the dynamic and powerful features of Elasticsearch.txt", + "books/Elasticsearch Cookbook.txt", + "books/Elasticsearch in Action.txt", + "books/Elasticsearch Indexing - Improve search experiences with Elasticsearch's powerful indexing functionality.txt", + "books/Elasticsearch Server.txt", + "books/Elasticsearch - The Definitive Guide.txt", + "books/ElasticSearchTutorial.txt", + "books/elasticsearch.txt", + "books/ElectronGettingStarted.txt", + "books/Electron in Action.txt", + "books/ElectronQuickIntro.txt", + "books/Elegant_SciPy.txt", + "books/Elixir in Action.txt", + "books/Elm Accelerated - James Porter.txt", + "books/Eloquent_JavaScript.txt", + "books/ELS2015.txt", + "books/embeddedvision.txt", + "books/Emergent Web Intelligence Advanced Information Retrieval.txt", + "books/Emergent Web Intelligence Advanced Semantic Technologies.txt", + "books/Enterprise_Big_Data_Lake (1).txt", + "books/EntityFrameworkCodeFirst.txt", + "books/EntityFrameworkCore.txt", + "books/entrepreneur revolution.txt", + "books/Eric Ries - The Lean Startup.txt", + "books/ESLII_print10.txt", + "books/Essential JavaScript Design Patterns.txt", + "books/eurosys10-boom 2.txt", + "books/eurosys10-boom.txt", + "books/expert-javascript.txt", + "books/expertpythonprogramming - Unknown.txt", + "books/Exploring Design Pattern For Dummies.txt", + "books/express-in-action.txt", + "books/Facebook - A Focus on Efficieny.txt", + "books/fashioning-data.txt", + "books/fcdae.txt", + "books/Feature Engineering for Machine Learning - Principles and Techniques for Data Scientists.txt", + "books/FlaskReleaseMarch03-2017.txt", + "books/flaskwebdevelopment.txt", + "books/Fluent_Python.txt", + "books/Foundations_for_Analytics_with_Python.txt", + "books/Foundations for Architecting Data Solutions - Managing Successful Data Projects.txt", + "books/Framing-Analytics-Requirements-v5.13.txt", + "books/FRIED_Jason_-_Rework.txt", + "books/FullStackJsDevelopmentWithMEAN.txt", + "books/Fundamentals of Data Visualization - A Primer on Making Informative and Compelling Figures.txt", + "books/gamedevelopmentusingpython.txt", + "books/GameProgrammingForKids.txt", + "books/Gaussian Processes for Machine Learning - Carl Edward Rasmussen.txt", + "books/Getting_Data_Right_Ch04_PE_Tamr.txt", + "books/getting-started-kubernetes-2nd.txt", + "books/GettingStartedWithASP.Net4.5WebForms.txt", + "books/Getting Started with Kubernetes.txt", + "books/Getting Started with Kudu - Jean-Marc Spaggiari.txt", + "books/GettingStartedWithLINQPad.txt", + "books/GettingStartedWithReactJs.txt", + "books/Getting Started with RStudio.txt", + "books/Getting Started with Storm.txt", + "books/Getting Started with TensorFlow.txt", + "books/Git - Giant Undo Button.txt", + "books/Git Internals.txt", + "books/GitInternals.txt", + "books/Git Internals - Unknown.txt", + "books/Global UX Design and Research in a Connected World.txt", + "books/GNU_C_LibraryReferenceManual.txt", + "books/GNULinuxAdvancedAdminstration.txt", + "books/go-building-web-applications.txt", + "books/go-design-patterns.txt", + "books/go-in-action.txt", + "books/go-in-practice.txt", + "books/go-programming-blueprints-2nd.txt", + "books/Go Recipes.txt", + "books/go.txt", + "books/go-web-programming.txt", + "books/Graph Algorithms - Practical Examples in Apache Spark and Neo4j.txt", + "books/Graph Databases - Ian Robinson, Jim Webber.txt", + "books/Graph Databases.txt", + "books/GraphDatabases.txt", + "books/Graphics of Large Datasets.txt", + "books/greppocketref.txt", + "books/Grokking Deep Learning.txt", + "books/GrowthHacking.txt", + "books/gsl_stats.txt", + "books/Hackers and Painters.txt", + "books/Hadoop in the Enterprise - Architecture - A Guide to Successful Integration.txt", + "books/Hadoop_Security.txt", + "books/Hadoop_ The Definitive Guide - Tom White.txt", + "books/Hadoop - The Definitive Guide.txt", + "books/hadoop-what-you-need-to-know.txt", + "books/hadoop-with-python.txt", + "books/HAL.txt", + "books/Handbook_Pt1.txt", + "books/Handbook_Pt2.txt", + "books/Handbook_Pt3.txt", + "books/Handbook_Pt4.txt", + "books/Hands-On Design Patterns with React Native - Mateusz Grzesiukiewicz.txt", + "books/hands-onfullstackdevelopmentwithspringboot20andreact.txt", + "books/hands-onfullstackwebdevelopmentwithangular6andlaravel5.txt", + "books/Hands-on Machine Learning with Scikit-Lear - Aurelien Geron.txt", + "books/Hands-On Machine Learning with Scikit Learn and TensorFlow - Concepts, Tools, and Techniques to Build Intelligent Systems.txt", + "books/Hands_On_Machine_Learning_with_Scikit_Learn_and_TensorFlow.txt", + "books/Hands On Machine Learning with Scikit Learn, Keras, and Tensorflow - Concepts, Tools, and Techniques to Build Intelligent Systems (Updated Release).txt", + "books/hdlwithdigitaldesign.txt", + "books/HeadFirstC.txt", + "books/Healing With Herbs and Spices_ Heal Your B - Simone McGrath.txt", + "books/HelloAndroid.txt", + "books/highperformanceimages.txt", + "books/High Performance JavaScript.txt", + "books/High_Performance_Mobile_Web.txt", + "books/HowToBuildAndScaleWithMicroServices.txt", + "books/HowToBuildAndScaleWithMicroServices - Unknown.txt", + "books/How to Live Forever - Alok Jha.txt", + "books/How to Pass Exams - Dominic O'Brien.txt", + "books/HTML5 and JavaScript Web Apps.txt", + "books/HTML5 Architecture.txt", + "books/HTML5CanvasReference.txt", + "books/HTML5 Canvas.txt", + "books/HTML5 Cookbook.txt", + "books/HTML5 & CSS3 FOR THE REAL WORLD.txt", + "books/HTML5-Programming-Cookbook.txt", + "books/HTML5SecurityCheatSheet.txt", + "books/HTML5_Vulnerabilities.txt", + "books/htmlcss2sample.txt", + "books/HTTP - 2 in Action.txt", + "books/human javascript - Henrik Joreteg.txt", + "books/Human JavaScript.txt", + "books/Identity and Data Security for Web Development Best Practices.txt", + "books/I Heart Logs Event Data, Stream Processing, and Data Integration.txt", + "books/Information Architecture For the Web and Beyond.txt", + "books/Information_Architecture_Fourth_Edition.txt", + "books/Information Theory, Inference, and Learning Algorithms .txt", + "books/Innovations in Classification, Data Science, and Information Systems.txt", + "books/Interactive Data Visualization for the Web.txt", + "books/Interactive_Data_Visualization_for_the_Web.txt", + "books/Interactive Data Visualization for the Web - Unknown.txt", + "books/Interview Preparations Kit - Software Engineer.txt", + "books/IntouchScriptingAndLogicGuide.txt", + "books/Introducing-Go.txt", + "books/introducingregularexpressions.txt", + "books/introduction-machine-learning-python.txt", + "books/introductionto3dgameprogrammingwithdirectx12.txt", + "books/Introduction to Docker.txt", + "books/IntroductionToDocker.txt", + "books/IntroductionToLinux.txt", + "books/IntroductionToNginx.txt", + "books/IntroToCrypto.txt", + "books/InventYourOwnGamesWithPython.txt", + "books/IPSUR.txt", + "books/ISLR Fourth Printing.txt", + "books/Java2.txt", + "books/JavaDesignPatterns.txt", + "books/JavaDevelopmentOnLnx.txt", + "books/JavaFXCookBook.txt", + "books/Java-JDBC.txt", + "books/JavaMultithreadingAndConcurrency.txt", + "books/JavaNIOCookbook.txt", + "books/JavaPersistenceAPI.txt", + "books/JavaScript A Beginners Guide .txt", + "books/JavaScript Cookbook.txt", + "books/JavaScriptInterviewQuestions.txt", + "books/JavaScript Patterns.txt", + "books/JavaScript_Succinctly.txt", + "books/JavaScript The Definitive Guide.txt", + "books/javascript_the_good_parts.txt", + "books/JavaScript The Good Parts.txt", + "books/JavaScript Web Applications.txt", + "books/JavaStartingIntoHibernate.txt", + "books/Java-ThinkJava.txt", + "books/Jenkins 2 - Up and Running - Evolve Your Deployment Pipeline for Next Generation Automation.txt", + "books/jenkins-the-definitive-guide.txt", + "books/JQueryHost.txt", + "books/JQueryNoviceToNinja.txt", + "books/jQuery_Succinctly.txt", + "books/JsNoviceToNinja2ndEdition.txt", + "books/JsNoviceToNinja.txt", + "books/Jurans Quality Handbook.txt", + "books/Kafka Streams in Action - Real time apps and microservices with the Kafka Streaming API.txt", + "books/Kafka - The Definitive Guide - Real Time Data and Stream Processing at Scale.txt", + "books/Kafka - The Definitive Guide.txt", + "books/Kubernetes Cookbook.txt", + "books/Kubernetes in Action.txt", + "books/Kubernetes Management Design Patterns With Docker, CoreOS Linux, and Other Platforms.txt", + "books/Kubernetes Microservices with Docker.txt", + "books/Kubernetes-Microservices with Docker.txt", + "books/Kuhn_Johnson_Applied_Predictive_Modeling.txt", + "books/LaBibliaDeMySQL.pdf.txt", + "books/lazy-analysts-guide-to-faster-sql.txt", + "books/Lean_Analytics.txt", + "books/Lean Customer Development.txt", + "books/Lean Enterprise.txt", + "books/Lean UX.txt", + "books/Learn Functional Programming by Implementing SQL with Underscore.js Presentation.txt", + "books/Learning_Agile.txt", + "books/Learning Apache Kafka.txt", + "books/Learning Apache Kafka - Unknown.txt", + "books/Learning Chaos Engineering - Russ Miles.txt", + "books/learningconcurrencyinpython - Unknown.txt", + "books/Learning Docker.txt", + "books/Learning ELK Stack - Build mesmerizing visualizations, analytics, and logs from your data using Elasticsearch, Logstash, and Kibana.txt", + "books/learninggnuemacs_3rdedition.txt", + "books/learninggraphql.txt", + "books/LearningJavaByBuildingAndroidGames.txt", + "books/Learning.Java_Oreilly_4th.Edition_Jun.2013.txt", + "books/Learning Java - Patrick Niemeyer.txt", + "books/Learning JavaScript Design Patterns.txt", + "books/learningjavascript.txt", + "books/Learning Java.txt", + "books/learningjquery3.txt", + "books/learningnodejsdevelopment.txt", + "books/learningphpmysqlandjavascript.txt", + "books/Learning Python, 5th Edition.txt", + "books/Learning Python - Mark Lutz.txt", + "books/Learning Python - Powerful Object-Oriented Programming.txt", + "books/LearningPython.txt", + "books/learningreact1.txt", + "books/learningroboticsusingpython - Unknown.txt", + "books/Learning Single-page Web Application Development.txt", + "books/Learning Spark.txt", + "books/Learning_Spark.txt", + "books/Learning_Swift.txt", + "books/learningthebashshell_3rdedition.txt", + "books/learningtheviandvimeditors_7thedition.txt", + "books/Learning Website Development with Django.txt", + "books/learnqt5.txt", + "books/lecture-22.txt", + "books/Linear Algebra Explained In Four Pages.txt", + "books/Linear Algebra.txt", + "books/Linked Data - Evolving The Web Into A Global Data Space.txt", + "books/Linked Open Data - The Essentials.txt", + "books/Linux Bible.txt", + "books/Linux Colección completa (2004).txt", + "books/LinuxCommandLineSheet.txt", + "books/LinuxCookBook.pdf - Pankaj Kumar.txt", + "books/linuxdevicedrivers.txt", + "books/LinuxEmbeddedDevelopment.txt", + "books/Linux From Scratch.txt", + "books/LinuxFromScratch.txt", + "books/linuxinanutshell.txt", + "books/Linux Internals_ Como funciona - Daniel Ezquerra.txt", + "books/LinuxKali.txt", + "books/LinuxNetworkingCookbook.txt", + "books/linuxpocketguide_3rdedition.txt", + "books/Linux Pocket.txt", + "books/LinuxPracticalSecurityCookBook.txt", + "books/LinuxShellScripting.txt", + "books/linuxsystemprogramming.txt", + "books/LittleInferenceBook.txt", + "books/Machine Learning Cheat Sheet.txt", + "books/Machine learning for hackers.txt", + "books/Machine Learning for Hackers.txt", + "books/Machine_Learning_with_R_Second_Edition.txt", + "books/Machine Learning with Spark.txt", + "books/Machine Learning with TensorFlow.txt", + "books/Maintainable JavaScript.txt", + "books/Making Isometric Social Real-Time Games with HTML5 CSS3 and JavaScript.txt", + "books/Management 3.0; Leading Agile Developers, - Jurgen Appelo.txt", + "books/ManualDePowerBuilder.txt", + "books/ManualDeSEO.txt", + "books/mapping-big-data.txt", + "books/MapReduce Design Patterns - Building Effective Algorithms and Analytics for Hadoop and Other Systems.txt", + "books/Mastering-Advanced-Analytics-With-Apache-Spark.txt", + "books/Mastering_Dart__Master_the_art_of.txt", + "books/Mastering ElasticSearch - Extend your knowledge on ElasticSearch, and querying and data handling, along with its internal workings.txt", + "books/masteringios12programming.txt", + "books/Mastering Kubernetes.txt", + "books/masteringmodularjavascript.txt", + "books/Mastering Modular JavaScript.txt", + "books/masteringpythonnetworking - Unknown.txt", + "books/masteringpython - Unknown.txt", + "books/masteringregularexpressions.txt", + "books/Mastering Regular Expressions.txt", + "books/Mastering Web Application Development with Express.txt", + "books/masteringxamarinuidevelopment.txt", + "books/mesos-in-action.txt", + "books/microservices-building-scalable-software.txt", + "books/microservices-deployment-cookbook.txt", + "books/Microservices Designing Deploying.txt", + "books/microservices-docker-microsoft-azure.txt", + "books/microservices-flexible-software-architecture.txt", + "books/microservices-from-day-one.txt", + "books/Microservices Patterns - With examples in Java.txt", + "books/microsoftaccess2019programmingwithvbaxmlandasp.txt", + "books/microsoftexcel2019programmingwithvbaxmlandasp.txt", + "books/microsoftexcelfunctionsandformulas_5e.txt", + "books/microsoftoffice2013_365andbeyond.txt", + "books/Mining Business Databases.txt", + "books/Mining of Data with Complex Structures.txt", + "books/Mining of Massive Datasets.txt", + "books/Mining Text Data.txt", + "books/Mining_the_Social_Web__Second_Edition (1).txt", + "books/Mining_the_Social_Web__Second_Edition.txt", + "books/Mining the Social Web.txt", + "books/Modeling With Data.txt", + "books/Modern Java in Action - Lambda, streams, functional and reactive programming.txt", + "books/ModernJs.txt", + "books/modernpythoncookbook - Unknown.txt", + "books/MongoDB3.txt", + "books/MongoDB - Applied Design Patterns, Practical Use Cases with the Leading NoSQL Database.txt", + "books/MongoDB Applied Design Patterns - Rick Copeland.txt", + "books/MongoDB Cookbook.txt", + "books/MongoDB - The Definitive Guide.txt", + "books/MongoDBTheDefinitiveGuide.txt", + "books/Monitoring with Graphite - Jason Dixon.txt", + "books/msexcel2016.txt", + "books/multimediawebdesignanddevelopment.txt", + "books/MySQLPluginDevelopmen.txt", + "books/native-docker-clustering-swarm.txt", + "books/Natural Language Annotation for Machine Learning.txt", + "books/Natural_Language_Annotation_for_Machine_Learning.txt", + "books/Natural Language Processing in Action - Understanding, analyzing, and generating text with Python.txt", + "books/Natural Language Processing with PyTorch - Build Intelligent Language Applications Using Deep Learning.txt", + "books/negron-muntaner-jennifers-butt.txt", + "books/NetworkProgrammingIndotNET.txt", + "books/Network_Security_Through_Data_Analysis.txt", + "books/New Trends in Computational Collective Intelligence.txt", + "books/Nodedotjs_Web_Development_Third_Edition.txt", + "books/Node for Front-End Developers.txt", + "books/NodeJsAdvancedGuide.txt", + "books/Node.js By Example.txt", + "books/Node.js Design Patterns.txt", + "books/Node.js in Action.txt", + "books/Node.js Recipes.txt", + "books/Node.js the Right Way.txt", + "books/NodeJs.txt", + "books/Node Up and Running.txt", + "books/Node- Up and Running.txt", + "books/NoSQLArchitectsGuide.txt", + "books/NoSQL Database Technology - A Survey and Comparison of Systems.txt", + "books/OraclePL-SQL3Edition.txt", + "books/OraclePL-SQL.txt", + "books/Oreilly.Beautiful.Data.Jul.2009.txt", + "books/O'Reilly Media -- Template for Microsoft W - na na.txt", + "books/OReilly.REST.API.Design.Rulebook.Oct.2011.ISBN.1449310508.txt", + "books/OReilly Twisted Network Programming Essentials 2nd Edition 2013.txt", + "books/Organizational_Profiles.txt", + "books/out-of-the-tar-pit.txt", + "books/PatternDesignInC++WithQt4.txt", + "books/PHP-And-MySql-NoviceToNinja.txt", + "books/Postgres.txt", + "books/Practical Cloud Security - A Guide for Secure Design and Deployment.txt", + "books/practicaldatacleaning.txt", + "books/Practical Machine Learning Tools and Techniques.txt", + "books/Practical Machine Learning.txt", + "books/Practical Node.js.txt", + "books/Practical Recommender Systems.txt", + "books/Practical Semantic Web and Linked Data Applications.txt", + "books/Practical_Statistics_for_Data_Scientists.txt", + "books/PrincipiosDeCompiladores1EraEdicion.txt", + "books/Principles of Data Quality.txt", + "books/Privacy and Big Data.txt", + "books/ProbStatBook.txt", + "books/pro-continuous-delivery-jenkins-2.txt", + "books/pro-docker.txt", + "books/Production Ready Microservices.txt", + "books/Pro Express.js.txt", + "books/Professional Node.js.txt", + "books/proGit.txt", + "books/Pro GIT.txt", + "books/ProgramacionEnC.txt", + "books/Programmable Logic Controller - Basic Prin - Lab-Volt.txt", + "books/Programming Hive - Edward Capriolo, Dean Wampler.txt", + "books/Programming HTML5 Applications.txt", + "books/Programming Kubernetes - michael Hausenblas.txt", + "books/Programming_Pig_Second_Edition.txt", + "books/Programming_Scala_Second_Edition.txt", + "books/Programming The Semantic Web.txt", + "books/Pro HTML5 Programming.txt", + "books/Pro JavaScript Design Patterns.txt", + "books/Pro .NET 2.0 Graphics Programming.txt", + "books/Pro Node.js for Developers.txt", + "books/Pro React.txt", + "books/Pro REST API Development with Node.js.txt", + "books/pro-vim-2014.txt", + "books/pynput.txt", + "books/py-quant-econ.txt", + "books/Python3CookBook.txt", + "books/python3_pocketprimer.txt", + "books/PythonBeginnerCheatSheet.txt", + "books/Python Cookbook, 2nd Edition.txt", + "books/Python Cookbook, 3rd Edition.txt", + "books/python-crash-course.txt", + "books/pythondataanalysiscookbook - Unknown.txt", + "books/Python Data Science Essentials.txt", + "books/pythondatascienceessentials - Unknown.txt", + "books/pythondatastructuresandalgorithms - Unknown.txt", + "books/Python Essential Reference.txt", + "books/PythonEssentialsCheatSheet.txt", + "books/Python for Data Analysis.txt", + "books/Python_for_Finance.txt", + "books/PythonGamesDevelopmentForBeginners.txt", + "books/Python GUI Programming Cookbook - Second Edition.txt", + "books/pythonguiprogrammingcookbook - Unknown.txt", + "books/Python GUI programming with Tkinter ( PDFDrive.com ) (2).txt", + "books/pythonhighperformance - Unknown.txt", + "books/Python_introduction.txt", + "books/Python Machine Learning Blueprints.txt", + "books/Python Machine Learning.txt", + "books/Python_Machine_Learning.txt", + "books/pythonmachinelearning - Unknown.txt", + "books/PythonMakingGamesWithPygame.txt", + "books/pythonmicroservicesdevelopment - Unknown.txt", + "books/Python-NetworkHacks.txt", + "books/python-pocket-reference-5th-edition.txt", + "books/pythonprogrammingwithraspberrypi - Unknown.txt", + "books/PythonTestingBeginnerGuide.txt", + "books/Python.Tkinter.Programming.txt", + "books/python-tricks.txt", + "books/PyWebScrapingBook.txt", + "books/Qt5 Python GUI Programming Cookbook_ Building responsive and powerful cross-platform applications with PyQt ( PDFDrive.com ).txt", + "books/quality-code-software-testing-principles-practices-and-patterns.txt", + "books/radziwill_statisticseasierwithr_preview.txt", + "books/randomforest2001.txt", + "books/R Cookbook - JD Long.txt", + "books/R_Cookbook.txt", + "books/R Data Structures and Algorithms.txt", + "books/R Deep Learning Cookbook.txt", + "books/reactandreactnative.txt", + "books/reactdesignpatternsandbestpractices.txt", + "books/Reactive Applications with Akka.Net.txt", + "books/Reactive Design Patterns.txt", + "books/ReactJs.txt", + "books/reactnativecookbook_ward.txt", + "books/React Native in Action.txt", + "books/REACT.txt", + "books/Real_Time_Big_Data_Analytics.txt", + "books/Real-World_Hadoop_MapR.txt", + "books/Redis Essentials.txt", + "books/Redis Essentials - Unknown.txt", + "books/Redis in Action.txt", + "books/Refactoring Improving the Design of Existing Code.txt", + "books/Regular Expression Pocket Reference.txt", + "books/Regular Expression Pocket Reference - Unknown.txt", + "books/Regular Expressions Cookbook.txt", + "books/RegularExpressions_Succinctly.txt", + "books/Relevant Search_ With applications for Sol - Doug Turnbull John Berryman.txt", + "books/Relevant Search - With applications for Solr and Elasticsearch.txt", + "books/ResponsiveDesign.txt", + "books/Responsive Web Design.txt", + "books/Responsive Web Design with HTML5 and CSS3.txt", + "books/rest-advanced-research-topics-and-practical-applications.txt", + "books/RESTful Java Patterns and Best Practices.txt", + "books/RESTful Java Web Services Security.txt", + "books/RESTful Java with JAX-RS 2.0, 2nd Edition.txt", + "books/RESTful Web API Design with Node.js.txt", + "books/RESTful Web APIs.txt", + "books/RESTful Web Clients - Enabling Reuse Through Hypermedia.txt", + "books/RESTful_Web_Services.txt", + "books/RESTful Web Services with Dropwizard.txt", + "books/Rexer_Analytics_2013_Data_Miner_Survey_Summary_Report.txt", + "books/RFP Proyecto CRM - Herve Cayard.txt", + "books/R_in_Action_Second__v15_MEAP.txt", + "books/R in a Nutshell, 2nd Edition.txt", + "books/Roy Cohn Part 01 of 01.txt", + "books/R_Packages.txt", + "books/R_ProgrammingSuccinctly.txt", + "books/rprogramming.txt", + "books/running-lean-iterate-from-plan-a-to-a-plan-that-works-lean-series.txt", + "books/RW.txt", + "books/Rxjs in Action.txt", + "books/Scala_Cookbook.txt", + "books/scala-test-driven-development.txt", + "books/Schema Matching and Mapping.txt", + "books/Secrets of the JavaScript Ninja.txt", + "books/Securing Devops - Safe Services in the Cloud.txt", + "books/sedandawk.txt", + "books/Semantic Web for the Working Ontologist.txt", + "books/Semantic Web for the Working Ontologist - Unknown.txt", + "books/Semantic Web Programming.txt", + "books/Semantic Web Services For Web Databases.txt", + "books/Semantic Web Services.txt", + "books/Semantic Web Technologies for Business Intelligence.txt", + "books/Site Reliability Engineering - How Google Runs Production Systems.txt", + "books/Slides - Communicating to Company.txt", + "books/Slides - How to Market.txt", + "books/Slides - How to Turn Feature Ideas Into User Stories.txt", + "books/Slides - Talking to Customers.txt", + "books/Slides - User Stories to Actual Features.txt", + "books/Slides - What do Product Managers Do.txt", + "books/Slides - What I Did As a Product Manager.txt", + "books/Slides - What Is Agile Development.txt", + "books/Slides - WhosOnTheTeam.txt", + "books/Slides - Working With Developers.txt", + "books/SLS_corrected_1.4.16.txt", + "books/SmashingNodeJs.txt", + "books/socc2012_bloom_lattices.txt", + "books/Social Data Mining.txt", + "books/softwarearchitecturewithpython - Unknown.txt", + "books/SoftwareDesignPatterns.txt", + "books/software-paradox.txt", + "books/software takes command.txt", + "books/softwaretestingprinciplesandpractices.txt", + "books/SoftwareTesting.txt", + "books/SolidPrinciples.txt", + "books/S.O.L.I.D_Principles.txt", + "books/Spark in Action.txt", + "books/Spark - The Definitive Guide - Big Data Processing Made Simple.txt", + "books/spatialEpiBook.txt", + "books/Speed Reading for Professionals - Mantesh.txt", + "books/spring5designpatterns.txt", + "books/SQL.Cookbook.2005.txt", + "books/StartingIntoAzure.txt", + "books/StartingIntoCouchDB.txt", + "books/StartingIntoGIT.txt", + "books/StartingIntoHTML5.txt", + "books/StartingIntoIonic.txt", + "books/StartingIntoMySQL.txt", + "books/StartingIntoNodeJs.txt", + "books/StartingIntoPHPEnvironment.txt", + "books/StartingIntoPLC_Programming.txt", + "books/StartingIntoXamarinForms.txt", + "books/StartUpBestPractices.txt", + "books/steve_jobs_walter_isaacson.txt", + "books/Streaming Data - Understanding the Real Time Pipeline.txt", + "books/Stunning CSS3 A project-based guide to the latest in CSS.txt", + "books/Swift.txt", + "books/t._cormen_-_introduction_to_algorithms_3rd_edition.txt", + "books/tdd-ebook-sample.txt", + "books/tensorflow2.txt", + "books/TensorFlow for Machine Intelligence - A Hands-On Introduction to Learning Algorithms.txt", + "books/TensorFlow Machine Learning Cookbook.txt", + "books/Testable JavaScript.txt", + "books/Test-Driven JavaScript Development 2.txt", + "books/Test Driven.txt", + "books/Testing Angular Applications.txt", + "books/Text Mining Classification, Clustering, and Applications.txt", + "books/TextMiningO.txt", + "books/TheArt&ScienceOfJS.txt", + "books/The Bastard Operator From Hell.txt", + "books/TheBeginnersGuideToNoSQL.txt", + "books/The Clean Coder - A Code of Conduct for Professional Programmers.txt", + "books/The CSS3 Anthology.txt", + "books/The Dart Programming Language.txt", + "books/The Data Analytics Handbook.txt", + "books/The Data Science Book.txt", + "books/The Design of Everyday Things.txt", + "books/The DevOps 2.0 Toolkit - Automating the Continuous Deployment Pipeline with Containerized Microservices.txt", + "books/The DevOps Adoption Playbook - A Guide to Adopting DevOps in a Multi-Speed IT Enterprise.txt", + "books/the-docker-book.txt", + "books/The Elements of Statistical Learning - Data Mining, Inference, and Prediction.txt", + "books/The Elements of Statistical Learning.txt", + "books/The Enterprise Big Data Lake - Delivering the Promise of Big Data and Data Science.txt", + "books/The Essential Guide to User Interface Design.txt", + "books/The_GNU_Debbuger.txt", + "books/The.Go.Programming.Language.txt", + "books/The Grammar of Graphics.txt", + "books/TheGuideToWireFraming.txt", + "books/The Laws of Simplicity.txt", + "books/the-lean-mindset-ask-the-right-questions.txt", + "books/The Lean Startup - How Today's Entrepreneurs Use Continuous Innovation to Create Radically Successful Businesses.txt", + "books/TheLinuxDevelopmentPlatform.txt", + "books/TheLinuxKernelModuleProgrammingGuid.txt", + "books/TheLinuxProgrammingInterface.txt", + "books/The Lion Way - Machine Learning plus Intelligent Optimization.txt", + "books/The Meaning of Tingo_ And Other Extraordin - Adam Jacot De Boinod.txt", + "books/The Minto Pyramid Principle - Logic in Writing, Thinking, & Problem Solving.txt", + "books/The Pragmatic Programmer From Journeyman to Master.txt", + "books/The Pragmatic Programmer.txt", + "books/The Principles of Beautiful Web Design.txt", + "books/The Site Reliability Workbook - Practical Ways to Implement SRE.txt", + "books/The Startup Owner s Manual_ The Step-by-Step Guide for Building a Great Company - Blank, Steve.txt", + "books/TheUltimateGuideToPrototyping.txt", + "books/Think Bayes - Bayesian Statistics Made Simple.txt", + "books/thinkbayes.txt", + "books/thinkcomplexity.txt", + "books/Think Like a Data Scientist. Tackle the data science process step by step.txt", + "books/Think Like a Programmer - An Intro. to Creative Problem Solving - V. Spraul (No Starch, 2012) BBS.txt", + "books/thinkpython.txt", + "books/Think Python.txt", + "books/thinkstats2.txt", + "books/Think Stats - Allen B. Downey.txt", + "books/Think Stats - Exploratory Data Analysis in Python.txt", + "books/thinkstats.txt", + "books/Think Stats.txt", + "books/Third-Party JavaScript.txt", + "books/tmux-taster-2014.txt", + "books/Transactions on Computational Collective I - Ngoc Thanh Nguyen (Editor).txt", + "books/Transactions on Computational Collective Intelligence III.txt", + "books/Transactions on Computational Collective Intelligence II.txt", + "books/Transactions on Computational Collective Intelligence I.txt", + "books/Transactions on Computational Collective Intelligence V.txt", + "books/Twitter_Bootstrap3_Succinctly.txt", + "books/TypeScript Design Patterns.txt", + "books/TypeScript.txt", + "books/UbuntuServerGuide.txt", + "books/understanding-chief-data-officer.txt", + "books/Understanding Computation - From Simple Machines to Impossible Programs.txt", + "books/UnderstandingDocker.txt", + "books/UnderstandingLinuxKernel3erEdition.txt", + "books/Understanding the Chief Data Officer - Unknown.txt", + "books/UnityGameDevelopment.txt", + "books/university-startups-and-spin-offs-guide-for-entrepreneurs-in-academia.txt", + "books/unixpowertools.txt", + "books/Unknown - Unknown.txt", + "books/User Interface Design for Programmers.txt", + "books/User Story Mapping - Discover the Whole Story, Build the Right Product.txt", + "books/using-asyncio-python-understanding-asynchronous.txt", + "books/Using AWS Lambda and Claudia.js.txt", + "books/using-docker.txt", + "books/Using Node.js for UI Testing.txt", + "books/usingsvgwithcss3andhtml5.txt", + "books/UX for Leaan Startups.txt", + "books/UX_Strategy.txt", + "books/VBAProfessionalTipsSecrets.txt", + "books/Version Control by Example.txt", + "books/Visualizing Data.txt", + "books/vuejs2designpatternsandbestpractices.txt", + "books/VueJs2.txt", + "books/vuejs_upandrunning.txt", + "books/Web Crawling and Data Mining with Apache Nutch.txt", + "books/Web Data Mining.txt", + "books/Web Development Recipes.txt", + "books/webdevelopmentwithdjangocookbook - Unknown.txt", + "books/Web Development with Node and Express.txt", + "books/Web Information Retrieval.txt", + "books/Web Mining and Social Networking Techniques and Applications.txt", + "books/Web Scraping with Python - Collecting More Data from the Modern Web.txt", + "books/why-startups-fail-and-how-yours-can-succeed.txt", + "books/Wiley - Pairs Trading - Quantitative Methods and Analysis.txt", + "books/wordpress5complete.txt", + "books/WPF.txt", + "books/youdontknowjs_es6andbeyond.txt", + "books/youdontknowjs_scopeandclosures.txt", + "books/youdontknowjs_upandgoing.txt", + "books/zero-one.txt", + "books/ZooKeeper - Distributed process coordination.txt", +]; diff --git a/12bitfloat_rust/risspam/src/main.rs b/12bitfloat_rust/risspam/src/main.rs index c09a6e5..546b3a1 100644 --- a/12bitfloat_rust/risspam/src/main.rs +++ b/12bitfloat_rust/risspam/src/main.rs @@ -3,7 +3,7 @@ #![allow(dead_code)] -mod books; +//mod books; use rayon::prelude::*; use std::cell::RefCell; @@ -183,7 +183,8 @@ fn analyze(text: &[u8], stats: &mut Stats) { } let word = &text[word_start..idx]; - +// let word = unsafe { &text.get_unchecked(word_start..idx) }; + // dbg!(str::from_utf8(word).unwrap()); words += 1; diff --git a/12bitfloat_rust/risspam/src/main_old.rs b/12bitfloat_rust/risspam/src/main_old.rs new file mode 100644 index 0000000..3f16052 --- /dev/null +++ b/12bitfloat_rust/risspam/src/main_old.rs @@ -0,0 +1,155 @@ +#![feature(let_chains)] + +use rayon::prelude::*; +//use rayon::prelude::*; +use std::{env, fs}; + +fn clean_content(content: &str) -> String { + let alloed_ichars = "01234567891abcdefghijklmnopqrstuvwxyz \n.,!?"; + + let clean_content = content.chars() + .filter(|&c| alloed_ichars.contains(c)) + .collect::(); + + clean_content +} + +fn get_sentences(content: &str) -> usize { + let sentences = content.split('.') + .map(|s| s.trim_start()) // Remove leading whitespace + .count(); + +// // Remove last "sentence" if didn't end with a dot +// if let Some(last) = sentences.last() && !last.ends_with('.') { +// sentences.pop(); +// } + + sentences +} + +fn get_words(content: &str, words: &mut usize, caps: &mut usize, fw: &mut usize) { + fn check_forbidden(w: &str) -> bool { + FORBIDDEN_WORDS.iter() + .find(|fw| str::eq_ignore_ascii_case(w, fw)) + .is_some() + } + + for word in content.split_whitespace() { + *words += 1; + + if is_fully_capitalized_word(word) { + *caps += 1; + } + if check_forbidden(word) { + *fw += 1; + } + } +} + +fn is_fully_capitalized_word(word: &str) -> bool { + word.chars() + .all(|c| !c.is_ascii_alphanumeric() || c.is_ascii_uppercase()) +} + +//fn get_capitalized_words(content: &str) -> usize { +// let sentences = get_sentences(content); +//// let mut cap_words = vec![]; +// let mut count = 0; +// +// for sentence in sentences { +// // Always skip the first word since sentences start with +// for word in get_words(sentence).skip(1) { +// if is_fully_capitalized_word(word) { +// count += 1; +// } +// } +// } +// +// count +//} + +fn get_numbers(clean_content: &str) -> usize { + clean_content.split(|c: char| !c.is_ascii_digit()) + .count() +} + +//fn get_forbidden_words(content: &str) -> usize { +// fn check_forbidden(w: &str) -> bool { +// FORBIDDEN_WORDS.iter() +// .find(|fw| str::eq_ignore_ascii_case(w, fw)) +// .is_some() +// } +// +// get_words(content) +// .filter(|w| check_forbidden(w)) +// .collect() +//} + +fn analyze(data: &str) { + let clean_data = clean_content(data); +// drop(clean_data); // You aren't actually using clean_data :O + + // All capitalized words + let mut words = 0; + let mut fw = 0; + let mut cap_words = 0; + get_words(&clean_data, &mut words, &mut fw, &mut cap_words); + + println!("All capitalized words: {}", cap_words); + + // All sentences + let sentences = get_sentences(data); + println!("Sentences: {}", sentences); + + // All words + println!("Words: {}", words); + + // Numbers + let numbers = get_numbers(&clean_data); + println!("Numbers: {}", numbers); + + // Forbidden words + println!("Forbidden words: {}", fw); + + if sentences > 0 { + let word_count_per_sentence = words / sentences; + println!("Word count per sentence: {}", word_count_per_sentence); + } +} + +fn main() { + // Read in files from args + let mut files = Vec::with_capacity(env::args().len()); + let mut do_parallel = false; + + for arg in env::args().skip(1) { // skip program arg + if arg == "-p" { + do_parallel = true; + } else { + files.push(arg); + } + } + + // Do the work + let work = |file| { + let Ok(text) = fs::read_to_string(&file) else { + eprintln!("{file} isn't a valid file or couldn't be read"); + return; + }; + + analyze(&text); + }; + + if !do_parallel { + files.iter().for_each(work); + } else { + files.par_iter().for_each(work) + } +} + +static FORBIDDEN_WORDS: &'static [&'static str] = &[ + "recovery", "techie", "http", "https", "digital", "hack", "::", "//", "com", + "@", "crypto", "bitcoin", "wallet", "hacker", "welcome", "whatsapp", "email", "cryptocurrency", + "stolen", "freeze", "quick", "crucial", "tracing", "scammers", "expers", "hire", "century", + "transaction", "essential", "managing", "contact", "contacting", "understanding", "assets", "funds" +]; diff --git a/12bitfloat_rust/risspam/src/main_pre_monoio.rs b/12bitfloat_rust/risspam/src/main_pre_monoio.rs new file mode 100644 index 0000000..28bfc44 --- /dev/null +++ b/12bitfloat_rust/risspam/src/main_pre_monoio.rs @@ -0,0 +1,828 @@ +#![feature(likely_unlikely)] + +mod books; + +use crate::books::FULL_BOOK_PATHS; +use core_affinity::CoreId; +use memmap2::Mmap; +use rayon::prelude::*; +use std::cell::OnceCell; +use std::cell::RefCell; +use std::ffi::{OsStr, OsString}; +use std::fs::File; +use std::mem::MaybeUninit; +use std::ops::Deref; +use std::os::linux::raw::stat; +use std::sync::Mutex; +use std::thread::available_parallelism; +use std::time::{Duration, Instant}; +use std::{array, env, fs, hint, mem, process, thread}; +use std::io::Read; +use libc::{aio_read, aiocb}; + +#[inline] +fn is_ascii_whitespace(b: u8) -> bool { + matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') +} + +#[inline] +fn is_ascii_upper(b: u8) -> bool { + matches!(b, b'A'..=b'Z') +} + +#[inline] +fn is_ascii_digit(b: u8) -> bool { + matches!(b, b'0'..=b'9') +} + +#[repr(align(128))] +#[derive(Copy, Clone)] +struct Stats { + pub sentences: u32, + pub words: u32, + pub capitalizeds: u32, + pub numbers: u32, + pub forbiddens: u32, +} + +static TIME_SPENT_READING_FILES: Mutex = Mutex::new(Duration::from_secs(0)); + +const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024; +thread_local! { + static WORK_STATE: RefCell = RefCell::new(WorkState::new()); +} + +pub struct WorkState { + pub work_mem: Box<[u8]>, +// pub io_mem: Box<[u8]>, +// pub curr_read: Option, +// pub had_first_load: bool, +} + +impl WorkState { + pub fn new() -> Self { + Self { + work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), +// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), +// curr_read: None, +// had_first_load: false, + } + } +} + +fn work(file_path: &OsStr, stats: &mut Stats) { + WORK_STATE.with_borrow_mut(|state: &mut WorkState| { +// // Load file +// let start_time = Instant::now(); + +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; + + let mut file = File::open(file_path).unwrap(); + let file_len = file.metadata().unwrap().len() as usize; + file.read_exact(&mut state.work_mem[..file_len]).unwrap(); + let text = &state.work_mem[..file_len]; + + unsafe { + let mut cb = mem::zeroed(); + + aio_read(&raw mut cb); + } + +// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice(); + +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + analyze(&text, stats); + }); +} + +fn analyze(text: &[u8], stats: &mut Stats) { +// // NOTE: mmap is quite a bit slower +// // Load file +// let Ok(file) = File::open(file_path) else { +// eprintln!("invalid file!"); +// std::process::abort(); +// }; +// let mmap = unsafe { +// Mmap::map(&file).unwrap() +// }; +// mem::forget(file); +// let text = &*mmap; + +// // Load file +// let start_time = Instant::now(); +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + let mut idx = 0; + 'full_loop: loop { + // Skip whitespace + while is_ascii_whitespace(text[idx]) { + idx += 1; + if idx >= text.len() { + break 'full_loop; + } + } + + // Find end of word + let word_start = idx; + let mut has_non_upper = false; + + 'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) { + idx += 1; + if idx >= text.len() { + break 'find_word_end; + } + + // Per-char logic + if b == b'.' { + sentences += 1; + } + if !is_ascii_upper(b) { + has_non_upper = true; + } + if is_ascii_digit(b) { + numbers += 1; + } + } + + let word = &text[word_start..idx]; + +// dbg!(str::from_utf8(word).unwrap()); + + words += 1; + + if !has_non_upper { + capitalizeds += 1; + } + + // Check forbidden + if unsafe { FW_TAB.lookup(word) } { + forbiddens += 1; + } + } + + /* + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { +// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + if unsafe { FW_TAB.lookup(token) } { + forbiddens += 1; + } + } + */ + + /* + // NOTE: This is pretty slow: + let mut idx = 0; + let mut word_start = 0; + let mut is_in_word = false; + let mut has_non_upper = false; + loop { + let b = unsafe { *text.get_unchecked(idx) }; + + let mut process_word = false; + if is_ascii_whitespace(b) { + if is_in_word { + process_word = true; + + // Reset state for next word + is_in_word = false; + has_non_upper = false; + } + } else { + if !is_in_word { + word_start = idx; + is_in_word = true; + } + + has_non_upper |= !is_ascii_upper(b); + } + + // Check digits + if is_ascii_digit(b) { + numbers += 1; + } + // Check sentences + if b == b'.' { + sentences += 1; + } + + let word = &text[word_start..idx]; + + idx += 1; + + if process_word || idx >= text.len() { + words += 1; + if !has_non_upper { + capitalizeds += 1; + } + +// // DEBUG: +// println!("'{}'", str::from_utf8(word).unwrap()); + + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } { + forbiddens += 1; + } + } + if idx >= text.len() { + break; + } + } + */ + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} + +/* +fn analyze_old(file_path: &OsStr, stats: &mut Stats) { + // Load file + let Ok(text) = fs::read(file_path) else { + eprintln!("invalid file!"); + std::process::abort(); + }; + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + forbiddens += 1; + } + } + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} +*/ + +fn main() { + // Read in files from args + let mut files = Vec::with_capacity(env::args().len()); + let mut do_parallel = false; + + let start_time = Instant::now(); + for arg in env::args_os().skip(1) { + // skip program arg + if arg == "-p" { + do_parallel = true; + } else { + files.push(arg); + } + } + println!("[PROFILE] taking args took {:?}", start_time.elapsed()); + +// env::args_os(). + +// let files = FULL_BOOK_PATHS; + +// // Build table +// let tab = FwTab::build(); +// tab.compile(); + + // Do the work + let mut stats = vec![Stats { + sentences: 0, + words: 0, + capitalizeds: 0, + numbers: 0, + forbiddens: 0, + }; files.len()]; + + let start_time = Instant::now(); + + let num_cores = available_parallelism().unwrap().get(); + let num_threads = num_cores * 1; + + // DEBUG: + dbg!(num_threads); + dbg!(num_cores); + + rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build_global() + .unwrap(); + + files.par_iter() + .enumerate() + .for_each(|(idx, p)| { + let s = unsafe { + &mut *stats.as_ptr() + .offset(idx as isize) + .cast_mut() + }; + +// let mut path = OsString::from("../../"); +// path.push(p); + let path = p; + work(path, s); + }); + +// thread::scope(|scope| { +// let files_per_thread = files.len() / num_threads; +// +// for thread_idx in 0..num_threads { +// let capture_files = &files; +// let capture_stats = &stats; +// thread::Builder::new().spawn_scoped(scope, move || { +// let files = capture_files; +// let stats = capture_stats; +// +// // Set thread affinity +// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores })); +// +// // Do work +// let thread_start = thread_idx * files_per_thread; +// for i in 0..files_per_thread { +// let real_idx = thread_start + i; +// let file_path = &files[real_idx]; +// let st = unsafe { +// &mut *stats.as_ptr() +// .offset(real_idx as isize) +// .cast_mut() +// }; +// +// work(&file_path, st); +// } +// }).unwrap(); +// } +// }); + + println!("[PROFILE] processing text took {:?}", start_time.elapsed()); + + // Accumulate stats + let start_time = Instant::now(); + + let mut total_words = 0; + let mut total_capitalizeds = 0; + let mut total_sentences = 0; + let mut total_numbers = 0; + let mut total_forbiddens = 0; + + for stat in &stats { + total_words += stat.words; + total_capitalizeds += stat.capitalizeds; + total_sentences += stat.sentences; + total_numbers += stat.numbers; + total_forbiddens += stat.forbiddens; + } + + let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0; + let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0; + let word_count_per_sentence = total_words as f32 / total_sentences as f32; + + println!(); + println!("Total Words: {total_words}"); + println!("Total Capitalized words: {total_capitalizeds}"); + println!("Total Sentences: {total_sentences}"); + println!("Total Numbers: {total_numbers}"); + println!("Total Forbidden words: {total_forbiddens}"); + println!("Capitalized percentage: {capitalized_percentage:.6}"); + println!("Forbidden percentage: {forbidden_percentage:.6}"); + println!("Word count per sentence: {word_count_per_sentence:.6}"); + println!("Total files read: {}", files.len()); + + println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed()); + + println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap()); + + // Exit process to avoid running drops + process::exit(0); +} + +#[repr(C)] +struct FwTab { + // pub dir_and_len_bits: [u32; 256], + pub dir_len_bits: [u16; 256], + pub dir: [u8; 256], + pub strs: [u8; 256], +} + +impl FwTab { + pub fn build() -> Self { + // Sort fws by first char + let mut sorted_fws: Vec> = vec![vec![]; 256]; + + for word in FORBIDDEN_WORDS { + sorted_fws[word[0] as usize].push(&word); + } + for i in 0..256 { + sorted_fws[i].sort() + } + + // // DEBUG: + // println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Build str tab + let mut fw_dir = [0u8; 256]; +// let mut fw_dir_len_bits = [0u32; 256]; + let mut fw_dir_len_bits = [0u16; 256]; + let mut fw_strs: Vec = vec![]; + + fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries + + for c in 0..256 { + for fw in FORBIDDEN_WORDS { + if c == fw[0] as usize { + fw_dir_len_bits[c] |= 0x1 << fw.len(); + } + } + + if !sorted_fws[c].is_empty() { + let sublist_start_offset = fw_strs.len().try_into().unwrap(); + fw_dir[c] = sublist_start_offset; + + // DEBUG: + println!("{c} start offset: {}", sublist_start_offset); + println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Push strings + for fw in &sorted_fws[c] { + fw_strs.push(fw.len().try_into().unwrap()); + for &c in &fw[1..] { + fw_strs.push(c); + } + } + + // Mark end of per-char word sublist + fw_strs.push(b'\0'); + } + } + + // DEBUG: + println!("strs len: {}", fw_strs.len()); + + assert_eq!(fw_dir.len(), 256); + assert!(fw_strs.len() <= 256); + + fw_strs.resize(256, 0); + + let tab = FwTab { + dir: fw_dir, + dir_len_bits: fw_dir_len_bits, +// dir_and_len_bits: array::from_fn(|idx| { +// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24) +// }), + strs: fw_strs.try_into().unwrap(), + }; + + // DEBUG: Test some strings + unsafe { + dbg!(tab.lookup(b"cpm")); + dbg!(tab.lookup(b"com")); + dbg!(tab.lookup(b"coma")); + dbg!(tab.lookup(b"co")); + dbg!(tab.lookup(b"cam")); + dbg!(tab.lookup(b"crypto")); + dbg!(tab.lookup(b"@")); + dbg!(tab.lookup(b"")); + dbg!(tab.lookup(b" ")); + dbg!(tab.lookup(b"test")); + dbg!(tab.lookup(b"expers")); + } + + tab + } + + pub fn compile(&self) { + println!("static FW_TAB: FwTab = FwTab {{"); + +// println!("\tdir_and_len_bits: ["); +// for chunk in self.dir_and_len_bits.chunks(16) { +// print!("\t\t"); +// for &b in chunk { +// print!("0x{b:08x}, "); +// } +// println!(); +// } +// println!("\t],"); + + println!("\tdir: ["); + for chunk in self.dir.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tdir_len_bits: ["); + for chunk in self.dir_len_bits.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:04x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tstrs: ["); + for chunk in self.strs.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("}};"); + } + + #[inline] + pub unsafe fn lookup(&self, word: &[u8]) -> bool { + let &[first_char, ..] = word else { + return false; + }; + +// let dir_and_len_bits = unsafe { +// *self.dir_and_len_bits.get_unchecked(first_char as usize) +// }; +// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 { +// return false; +// } + + let len_bits = unsafe { + *self.dir_len_bits.get_unchecked(first_char as usize) + }; + if word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0 { + return false; + } + +// let mut str_offset = (dir_and_len_bits >> 24) as usize; + let mut str_offset = unsafe { + *self.dir.get_unchecked(first_char as usize) as usize + }; + + // Char doesn't have any strings in the table + if str_offset == 0 { + return false; + } + + // Iterate over strs + loop { +// let fw_len = u16::from_le_bytes([ +// self.strs[str_offset], +// self.strs[str_offset+1] +// ]); + let fw_len: u8 = unsafe { + *self.strs.get_unchecked(str_offset) + }; + + if fw_len == 0 { + // We've reached the end of the word sublist + return false; + } + + // Only compare words if they are the same length + if word.len() == fw_len as usize { + // Compare strs + let mut char_offset = 1usize; + loop { + // Found the word! + if char_offset == word.len() { + return true; + } + + let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) }; + let word_char = unsafe { *word.get_unchecked(char_offset) }; + + if fw_char > word_char { + // Word can't possible be in the sorted list, return + return false; + } + if fw_char < word_char { + // Try next word + break; + } + + char_offset += 1; + } + } + + // Advance to next word +// let str_len_bytes = 2; + let str_len_bytes = 1; + str_offset += (fw_len as usize - 1) + str_len_bytes; + } + } +} + +const FORBIDDEN_WORDS: [&'static [u8]; 35] = [ + b"recovery", + b"techie", + b"http", + b"https", + b"digital", + b"hack", + b"::", + b"//", + b"com", + b"@", + b"crypto", + b"bitcoin", + b"wallet", + b"hacker", + b"welcome", + b"whatsapp", + b"email", + b"cryptocurrency", + b"stolen", + b"freeze", + b"quick", + b"crucial", + b"tracing", + b"scammers", + b"expers", + b"hire", + b"century", + b"transaction", + b"essential", + b"managing", + b"contact", + b"contacting", + b"understanding", + b"assets", + b"funds", +]; + +static FW_TAB: FwTab = FwTab { + dir: [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, + 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + dir_len_bits: [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000, + 0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + strs: [ + 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, + 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, + 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, + 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, + 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, + 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, + 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, + 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, + 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, + 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, + 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, + 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, + 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, + 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, + 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, + 0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], +}; + + +//static FW_TAB_DIR: [u8; 256] = [ +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, +// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +//]; +//static FW_TAB_STRS: [u8; 244] = [ +// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, +// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, +// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, +// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, +// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, +// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, +// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, +// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, +// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, +// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, +// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, +// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, +// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, +// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, +// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, +// 0x61, 0x70, 0x70, 0x00, +//]; diff --git a/12bitfloat_rust/risspam/src/main_pre_simd_cmp.rs b/12bitfloat_rust/risspam/src/main_pre_simd_cmp.rs new file mode 100644 index 0000000..d096c5f --- /dev/null +++ b/12bitfloat_rust/risspam/src/main_pre_simd_cmp.rs @@ -0,0 +1,891 @@ +#![feature(likely_unlikely)] +#![feature(rust_cold_cc)] + +mod books; + +use crate::books::FULL_BOOK_PATHS; +use core_affinity::CoreId; +use libc::{aio_read, aiocb, read}; +use memmap2::Mmap; +use rayon::prelude::*; +use std::cell::OnceCell; +use std::cell::RefCell; +use std::ffi::{OsStr, OsString}; +use std::fs::{File, OpenOptions}; +use std::io::Read; +use std::mem::MaybeUninit; +use std::ops::Deref; +use std::os::linux::raw::stat; +use std::sync::Mutex; +use std::thread::available_parallelism; +use std::time::{Duration, Instant}; +use std::{array, env, fs, hint, mem, process, thread}; +use std::hint::assert_unchecked; +use std::os::unix::fs::{FileExt, OpenOptionsExt}; + +#[inline] +fn is_ascii_whitespace(b: u8) -> bool { + matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') +} + +#[inline] +fn is_ascii_upper(b: u8) -> bool { + matches!(b, b'A'..=b'Z') +} + +#[inline] +fn is_ascii_digit(b: u8) -> bool { + matches!(b, b'0'..=b'9') +} + +#[repr(align(128))] +#[derive(Copy, Clone)] +struct Stats { + pub sentences: u32, + pub words: u32, + pub capitalizeds: u32, + pub numbers: u32, + pub forbiddens: u32, +} + +static TIME_SPENT_READING_FILES: Mutex = Mutex::new(Duration::from_secs(0)); + +const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024; +thread_local! { + static WORK_STATE: RefCell = RefCell::new(WorkState::new()); +} + +pub struct WorkState { + pub work_mem: Box<[u8]>, +// pub io_mem: Box<[u8]>, +// pub curr_read: Option, +// pub had_first_load: bool, +} + +impl WorkState { + pub fn new() -> Self { + Self { + work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), +// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), +// curr_read: None, +// had_first_load: false, + } + } +} + +#[cold] +#[inline(never)] +extern "rust-cold" fn die() -> ! { + println!("Something went wrong! I'm going to die now"); + process::abort() +} + +fn work(file_path: &OsStr, stats: &mut Stats) { + WORK_STATE.with_borrow_mut(|state: &mut WorkState| { +// // Load file +// let start_time = Instant::now(); + +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; + + // NOTE: Reading the file like this is noticeably faster! + let mut file = OpenOptions::new() + .read(true) +// .custom_flags(libc::O_DIRECT) // O_DIRECT is A LOT slower!! + .open(file_path) + .unwrap_or_else(|_| die()); + + let mut read_offset = 0; + loop { +// let rb = file.read_at(&mut state.work_mem[read_offset..], read_offset as u64) + let rb = file.read(&mut state.work_mem[read_offset..]) + .unwrap_or_else(|_| die()); + + if hint::unlikely(rb == 0) { + break; + } + + read_offset += rb; + } + let text = &state.work_mem[..read_offset]; + +// file.read_exact(&mut state.work_mem[..file_len]).unwrap(); + +// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice(); + +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + analyze(&text, stats); + }); +} + +fn analyze(text: &[u8], stats: &mut Stats) { +// // NOTE: mmap is quite a bit slower +// // Load file +// let Ok(file) = File::open(file_path) else { +// eprintln!("invalid file!"); +// std::process::abort(); +// }; +// let mmap = unsafe { +// Mmap::map(&file).unwrap() +// }; +// mem::forget(file); +// let text = &*mmap; + +// // Load file +// let start_time = Instant::now(); +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + let mut idx = 0; + 'full_loop: loop { + // Skip whitespace + while is_ascii_whitespace(text[idx]) { + idx += 1; + if hint::unlikely(idx >= text.len()) { + break 'full_loop; + } + } + + // Find end of word + let word_start = idx; + let mut has_non_upper = false; + + 'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) { + idx += 1; + if hint::unlikely(idx >= text.len()) { + break 'find_word_end; + } + + // Per-char logic + if !is_ascii_upper(b) { + has_non_upper = true; + } + if b == b'.' { + sentences += 1; + } + if is_ascii_digit(b) { + numbers += 1; + } +// sentences += (b == b'.') as u32; +// numbers += is_ascii_digit(b) as u32; + } + + let word = &text[word_start..idx]; + +// dbg!(str::from_utf8(word).unwrap()); + + words += 1; + + if !has_non_upper { + capitalizeds += 1; + } + + // Check forbidden + if unsafe { FW_TAB.lookup(word) } { +// if FW_PHF.contains(word) { // phf is a lot slower than my FwTab + forbiddens += 1; + } + } + + /* + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { +// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + if unsafe { FW_TAB.lookup(token) } { + forbiddens += 1; + } + } + */ + + /* + // NOTE: This is pretty slow: + let mut idx = 0; + let mut word_start = 0; + let mut is_in_word = false; + let mut has_non_upper = false; + loop { + let b = unsafe { *text.get_unchecked(idx) }; + + let mut process_word = false; + if is_ascii_whitespace(b) { + if is_in_word { + process_word = true; + + // Reset state for next word + is_in_word = false; + has_non_upper = false; + } + } else { + if !is_in_word { + word_start = idx; + is_in_word = true; + } + + has_non_upper |= !is_ascii_upper(b); + } + + // Check digits + if is_ascii_digit(b) { + numbers += 1; + } + // Check sentences + if b == b'.' { + sentences += 1; + } + + let word = &text[word_start..idx]; + + idx += 1; + + if process_word || idx >= text.len() { + words += 1; + if !has_non_upper { + capitalizeds += 1; + } + +// // DEBUG: +// println!("'{}'", str::from_utf8(word).unwrap()); + + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } { + forbiddens += 1; + } + } + if idx >= text.len() { + break; + } + } + */ + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} + +/* +fn analyze_old(file_path: &OsStr, stats: &mut Stats) { + // Load file + let Ok(text) = fs::read(file_path) else { + eprintln!("invalid file!"); + std::process::abort(); + }; + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + forbiddens += 1; + } + } + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} +*/ + +fn main() { + // Read in files from args + let mut files = Vec::with_capacity(env::args().len()); + let mut do_parallel = false; + + let start_time = Instant::now(); + for arg in env::args_os().skip(1) { + // skip program arg + if arg == "-p" { + do_parallel = true; + } else { + files.push(arg); + } + } + println!("[PROFILE] taking args took {:?}", start_time.elapsed()); + +// env::args_os(). + +// let files = FULL_BOOK_PATHS; + +// // Build table +// let tab = FwTab::build(); +// tab.compile(); + + // Do the work + let mut stats = vec![Stats { + sentences: 0, + words: 0, + capitalizeds: 0, + numbers: 0, + forbiddens: 0, + }; files.len()]; + + let start_time = Instant::now(); + + let num_cores = available_parallelism().unwrap().get(); + let num_threads = num_cores * 1; + +// // DEBUG: +// dbg!(num_threads); +// dbg!(num_cores); + + rayon::ThreadPoolBuilder::new() + .num_threads(num_threads) + .build_global() + .unwrap(); + + files.par_iter() + .enumerate() + .for_each(|(idx, p)| { + let s = unsafe { + &mut *stats.as_ptr() + .offset(idx as isize) + .cast_mut() + }; + +// let mut path = OsString::from("../../"); +// path.push(p); + let path = p; + work(path, s); + }); + +// thread::scope(|scope| { +// let files_per_thread = files.len() / num_threads; +// +// for thread_idx in 0..num_threads { +// let capture_files = &files; +// let capture_stats = &stats; +// thread::Builder::new().spawn_scoped(scope, move || { +// let files = capture_files; +// let stats = capture_stats; +// +// // Set thread affinity +// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores })); +// +// // Do work +// let thread_start = thread_idx * files_per_thread; +// for i in 0..files_per_thread { +// let real_idx = thread_start + i; +// let file_path = &files[real_idx]; +// let st = unsafe { +// &mut *stats.as_ptr() +// .offset(real_idx as isize) +// .cast_mut() +// }; +// +// work(&file_path, st); +// } +// }).unwrap(); +// } +// }); + + println!("[PROFILE] processing text took {:?}", start_time.elapsed()); + + // Accumulate stats + let start_time = Instant::now(); + + let mut total_words = 0; + let mut total_capitalizeds = 0; + let mut total_sentences = 0; + let mut total_numbers = 0; + let mut total_forbiddens = 0; + + for stat in &stats { + total_words += stat.words; + total_capitalizeds += stat.capitalizeds; + total_sentences += stat.sentences; + total_numbers += stat.numbers; + total_forbiddens += stat.forbiddens; + } + + let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0; + let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0; + let word_count_per_sentence = total_words as f32 / total_sentences as f32; + + println!(); + println!("Total Words: {total_words}"); + println!("Total Capitalized words: {total_capitalizeds}"); + println!("Total Sentences: {total_sentences}"); + println!("Total Numbers: {total_numbers}"); + println!("Total Forbidden words: {total_forbiddens}"); + println!("Capitalized percentage: {capitalized_percentage:.6}"); + println!("Forbidden percentage: {forbidden_percentage:.6}"); + println!("Word count per sentence: {word_count_per_sentence:.6}"); + println!("Total files read: {}", files.len()); + + println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed()); + + println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap()); + + // Exit process to avoid running drops + process::exit(0); +} + +#[repr(C)] +struct FwTab { + // pub dir_and_len_bits: [u32; 256], + pub dir_len_bits: [u16; 256], + pub dir: [u8; 256], + pub strs: [u8; 256], +} + +impl FwTab { + pub fn build() -> Self { + // Sort fws by first char + let mut sorted_fws: Vec> = vec![vec![]; 256]; + + for word in FORBIDDEN_WORDS { + sorted_fws[word[0] as usize].push(&word); + } + for i in 0..256 { + sorted_fws[i].sort() + } + + // // DEBUG: + // println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Build str tab + let mut fw_dir = [0u8; 256]; +// let mut fw_dir_len_bits = [0u32; 256]; + let mut fw_dir_len_bits = [0u16; 256]; + let mut fw_strs: Vec = vec![]; + + fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries + + for c in 0..256 { + for fw in FORBIDDEN_WORDS { + if c == fw[0] as usize { + fw_dir_len_bits[c] |= 0x1 << fw.len(); + } + } + + if !sorted_fws[c].is_empty() { + let sublist_start_offset = fw_strs.len().try_into().unwrap(); + fw_dir[c] = sublist_start_offset; + + // DEBUG: + println!("{c} start offset: {}", sublist_start_offset); + println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Push strings + for fw in &sorted_fws[c] { + fw_strs.push(fw.len().try_into().unwrap()); + for &c in &fw[1..] { + fw_strs.push(c); + } + } + + // Mark end of per-char word sublist + fw_strs.push(b'\0'); + } + } + + // DEBUG: + println!("strs len: {}", fw_strs.len()); + + assert_eq!(fw_dir.len(), 256); + assert!(fw_strs.len() <= 256); + + fw_strs.resize(256, 0); + + let tab = FwTab { + dir: fw_dir, + dir_len_bits: fw_dir_len_bits, +// dir_and_len_bits: array::from_fn(|idx| { +// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24) +// }), + strs: fw_strs.try_into().unwrap(), + }; + + // DEBUG: Test some strings + unsafe { + dbg!(tab.lookup(b"cpm")); + dbg!(tab.lookup(b"com")); + dbg!(tab.lookup(b"coma")); + dbg!(tab.lookup(b"co")); + dbg!(tab.lookup(b"cam")); + dbg!(tab.lookup(b"crypto")); + dbg!(tab.lookup(b"@")); + dbg!(tab.lookup(b"")); + dbg!(tab.lookup(b" ")); + dbg!(tab.lookup(b"test")); + dbg!(tab.lookup(b"expers")); + } + + tab + } + + pub fn compile(&self) { + println!("static FW_TAB: FwTab = FwTab {{"); + +// println!("\tdir_and_len_bits: ["); +// for chunk in self.dir_and_len_bits.chunks(16) { +// print!("\t\t"); +// for &b in chunk { +// print!("0x{b:08x}, "); +// } +// println!(); +// } +// println!("\t],"); + + println!("\tdir: ["); + for chunk in self.dir.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tdir_len_bits: ["); + for chunk in self.dir_len_bits.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:04x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tstrs: ["); + for chunk in self.strs.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("}};"); + } + + #[inline] + pub unsafe fn lookup(&self, word: &[u8]) -> bool { +// let &[first_char, ..] = word else { +// return false; +// }; + let first_char = unsafe { *word.get_unchecked(0) }; + +// let dir_and_len_bits = unsafe { +// *self.dir_and_len_bits.get_unchecked(first_char as usize) +// }; +// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 { +// return false; +// } + + let len_bits = unsafe { + *self.dir_len_bits.get_unchecked(first_char as usize) + }; + if hint::likely(word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0) { + return false; + } + +// let mut str_offset = (dir_and_len_bits >> 24) as usize; + let mut str_offset = unsafe { + *self.dir.get_unchecked(first_char as usize) as usize + }; + + // Char doesn't have any strings in the table + if str_offset == 0 { + return false; + } + + // Iterate over strs + loop { +// let fw_len = u16::from_le_bytes([ +// self.strs[str_offset], +// self.strs[str_offset+1] +// ]); + let fw_len: u8 = unsafe { + *self.strs.get_unchecked(str_offset) + }; + + if fw_len == 0 { + // We've reached the end of the word sublist + return false; + } + + // Only compare words if they are the same length + if hint::unlikely(word.len() == fw_len as usize) { + // Compare strs + let mut char_offset = 1usize; + loop { + // Found the word! + if char_offset == word.len() { + return true; + } + + let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) }; + let word_char = unsafe { *word.get_unchecked(char_offset) }; + + if fw_char > word_char { + // Word can't possible be in the sorted list, return + return false; + } + if fw_char < word_char { + // Try next word + break; + } + + char_offset += 1; + } + } + + // Advance to next word +// let str_len_bytes = 2; + let str_len_bytes = 1; + str_offset += (fw_len as usize - 1) + str_len_bytes; + } + } +} + +const FORBIDDEN_WORDS: [&'static [u8]; 35] = [ + b"recovery", + b"techie", + b"http", + b"https", + b"digital", + b"hack", + b"::", + b"//", + b"com", + b"@", + b"crypto", + b"bitcoin", + b"wallet", + b"hacker", + b"welcome", + b"whatsapp", + b"email", + b"cryptocurrency", + b"stolen", + b"freeze", + b"quick", + b"crucial", + b"tracing", + b"scammers", + b"expers", + b"hire", + b"century", + b"transaction", + b"essential", + b"managing", + b"contact", + b"contacting", + b"understanding", + b"assets", + b"funds", +]; + +static FW_TAB: FwTab = FwTab { + dir: [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, + 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + dir_len_bits: [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000, + 0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + strs: [ + 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, + 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, + 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, + 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, + 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, + 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, + 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, + 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, + 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, + 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, + 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, + 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, + 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, + 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, + 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, + 0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], +}; + +static FW_PHF: phf::Set<&'static [u8]> = phf::phf_set! { + b"recovery", + b"techie", + b"http", + b"https", + b"digital", + b"hack", + b"::", + b"//", + b"com", + b"@", + b"crypto", + b"bitcoin", + b"wallet", + b"hacker", + b"welcome", + b"whatsapp", + b"email", + b"cryptocurrency", + b"stolen", + b"freeze", + b"quick", + b"crucial", + b"tracing", + b"scammers", + b"expers", + b"hire", + b"century", + b"transaction", + b"essential", + b"managing", + b"contact", + b"contacting", + b"understanding", + b"assets", + b"funds", +}; + +//static FW_TAB_DIR: [u8; 256] = [ +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, +// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +//]; +//static FW_TAB_STRS: [u8; 244] = [ +// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, +// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, +// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, +// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, +// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, +// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, +// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, +// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, +// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, +// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, +// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, +// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, +// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, +// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, +// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, +// 0x61, 0x70, 0x70, 0x00, +//]; diff --git a/12bitfloat_rust/risspam/src/main_slow_monoio.rs b/12bitfloat_rust/risspam/src/main_slow_monoio.rs new file mode 100644 index 0000000..17dbcd1 --- /dev/null +++ b/12bitfloat_rust/risspam/src/main_slow_monoio.rs @@ -0,0 +1,925 @@ +#![feature(likely_unlikely)] + +mod books; + +use crate::books::FULL_BOOK_PATHS; +use core_affinity::CoreId; +use libc::{aio_read, aiocb}; +use memmap2::Mmap; +use rayon::prelude::*; +use std::cell::OnceCell; +use std::cell::RefCell; +use std::ffi::{OsStr, OsString}; +use std::fs::File; +use std::io::Read; +use std::mem::MaybeUninit; +use std::ops::Deref; +use std::os::linux::raw::stat; +use std::sync::atomic::{AtomicU8, AtomicUsize, Ordering}; +use std::sync::{Arc, Mutex}; +use std::thread::available_parallelism; +use std::time::{Duration, Instant}; +use std::{array, env, fs, hint, mem, process, thread}; +use monoio::IoUringDriver; + +#[inline] +fn is_ascii_whitespace(b: u8) -> bool { + matches!(b, b'\t' | b'\n' | b'\x0C' | b'\r' | b' ') +} + +#[inline] +fn is_ascii_upper(b: u8) -> bool { + matches!(b, b'A'..=b'Z') +} + +#[inline] +fn is_ascii_digit(b: u8) -> bool { + matches!(b, b'0'..=b'9') +} + +#[repr(align(128))] +#[derive(Copy, Clone)] +struct Stats { + pub sentences: u32, + pub words: u32, + pub capitalizeds: u32, + pub numbers: u32, + pub forbiddens: u32, +} + +static TIME_SPENT_READING_FILES: Mutex = Mutex::new(Duration::from_secs(0)); + +const TEMP_MEM_SIZE: usize = 6 * 1024 * 1024; +thread_local! { + static WORK_STATE: RefCell = RefCell::new(WorkState::new()); +} + +pub struct WorkState { + pub work_mem: Box<[u8]>, + pub empty_vec: Box<[u8]>, +// pub io_mem: Box<[u8]>, +// pub curr_read: Option, +// pub had_first_load: bool, +} + +impl WorkState { + pub fn new() -> Self { + Self { + work_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), + empty_vec: vec![].into_boxed_slice(), +// io_mem: vec![0; TEMP_MEM_SIZE].into_boxed_slice(), +// curr_read: None, +// had_first_load: false, + } + } +} + +fn work(file_path: &OsStr, stats: &mut Stats) { + WORK_STATE.with_borrow_mut(|state: &mut WorkState| { +// // Load file +// let start_time = Instant::now(); + +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; + + let mut file = File::open(file_path).unwrap(); + let file_len = file.metadata().unwrap().len() as usize; + file.read_exact(&mut state.work_mem[..file_len]).unwrap(); + let text = &state.work_mem[..file_len]; + + unsafe { + let mut cb = mem::zeroed(); + + aio_read(&raw mut cb); + } + +// let text = include_bytes!("../../../books/Advanced Techniques in Web Intelligence – Part II.txt").as_slice(); + +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + analyze(&text, stats); + }); +} + +fn analyze(text: &[u8], stats: &mut Stats) { +// // NOTE: mmap is quite a bit slower +// // Load file +// let Ok(file) = File::open(file_path) else { +// eprintln!("invalid file!"); +// std::process::abort(); +// }; +// let mmap = unsafe { +// Mmap::map(&file).unwrap() +// }; +// mem::forget(file); +// let text = &*mmap; + +// // Load file +// let start_time = Instant::now(); +// let Ok(text) = fs::read(file_path) else { +// eprintln!("invalid file!"); +// process::abort(); +// }; +// let time_reading = start_time.elapsed(); +// { +// let mut guard = TIME_SPENT_READING_FILES.lock().unwrap(); +// *guard += time_reading; +// } + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + let mut idx = 0; + 'full_loop: loop { + // TODO: Necessary for now + if idx >= text.len() { + break 'full_loop; + } + + // Skip whitespace + while is_ascii_whitespace(text[idx]) { + idx += 1; + if idx >= text.len() { + break 'full_loop; + } + } + + // Find end of word + let word_start = idx; + let mut has_non_upper = false; + + 'find_word_end: while let b = text[idx] && !is_ascii_whitespace(b) { + idx += 1; + if idx >= text.len() { + break 'find_word_end; + } + + // Per-char logic + if b == b'.' { + sentences += 1; + } + if !is_ascii_upper(b) { + has_non_upper = true; + } + if is_ascii_digit(b) { + numbers += 1; + } + } + + let word = &text[word_start..idx]; + +// dbg!(str::from_utf8(word).unwrap()); + + words += 1; + + if !has_non_upper { + capitalizeds += 1; + } + + // Check forbidden + if unsafe { FW_TAB.lookup(word) } { + forbiddens += 1; + } + } + + /* + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { +// if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + if unsafe { FW_TAB.lookup(token) } { + forbiddens += 1; + } + } + */ + + /* + // NOTE: This is pretty slow: + let mut idx = 0; + let mut word_start = 0; + let mut is_in_word = false; + let mut has_non_upper = false; + loop { + let b = unsafe { *text.get_unchecked(idx) }; + + let mut process_word = false; + if is_ascii_whitespace(b) { + if is_in_word { + process_word = true; + + // Reset state for next word + is_in_word = false; + has_non_upper = false; + } + } else { + if !is_in_word { + word_start = idx; + is_in_word = true; + } + + has_non_upper |= !is_ascii_upper(b); + } + + // Check digits + if is_ascii_digit(b) { + numbers += 1; + } + // Check sentences + if b == b'.' { + sentences += 1; + } + + let word = &text[word_start..idx]; + + idx += 1; + + if process_word || idx >= text.len() { + words += 1; + if !has_non_upper { + capitalizeds += 1; + } + +// // DEBUG: +// println!("'{}'", str::from_utf8(word).unwrap()); + + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, word) } { + forbiddens += 1; + } + } + if idx >= text.len() { + break; + } + } + */ + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} + +/* +fn analyze_old(file_path: &OsStr, stats: &mut Stats) { + // Load file + let Ok(text) = fs::read(file_path) else { + eprintln!("invalid file!"); + std::process::abort(); + }; + + let mut sentences = 0; + let mut words = 0; + let mut capitalizeds = 0; + let mut numbers = 0; + let mut forbiddens = 0; + + for token in text.split(|&b| is_ascii_whitespace(b)) { + if token.is_empty() { + continue; + } + + words += 1; + + // Sentence count, folded into this loop + // instead of another loop (better cache usage) + for &b in token { + if b == b'.' { + sentences += 1; + } + } + + // Check if upper + if token.iter().all(|&b| is_ascii_upper(b)) { + capitalizeds += 1; + } + + // Check digits + for &b in token { + if is_ascii_digit(b) { + numbers += 1; + } + } + + // Check if words +// if FORBIDDEN_WORDS.contains(&token) { + if unsafe { FwTab::lookup_raw(&FW_TAB_DIR, &FW_TAB_STRS, token) } { + forbiddens += 1; + } + } + + stats.sentences = sentences; + stats.words = words; + stats.capitalizeds = capitalizeds; + stats.numbers = numbers; + stats.forbiddens = forbiddens; +} +*/ + +fn main() { + // Read in files from args + let mut files = Vec::with_capacity(env::args().len()); + let mut do_parallel = false; + + let start_time = Instant::now(); + for arg in env::args_os().skip(1) { + // skip program arg + if arg == "-p" { + do_parallel = true; + } else { + files.push(arg); + } + } + println!("[PROFILE] taking args took {:?}", start_time.elapsed()); + +// env::args_os(). + +// let files = FULL_BOOK_PATHS; + +// // Build table +// let tab = FwTab::build(); +// tab.compile(); + + // Do the work + let mut stats = vec![Stats { + sentences: 0, + words: 0, + capitalizeds: 0, + numbers: 0, + forbiddens: 0, + }; files.len()]; + + let start_time = Instant::now(); + + let num_cores = available_parallelism().unwrap().get(); + let num_threads = num_cores * 1; + + // DEBUG: + dbg!(num_threads); + dbg!(num_cores); + + let next_file_idx = &*Box::leak(Box::new(AtomicUsize::new(0))); + + thread::scope(|scope| { + for thread_idx in 0..num_threads { + // Set thread affinity + assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores })); + + let cap_next_file_idx = &next_file_idx; + let cap_stats_ptr = stats.as_ptr() as usize; + let cap_files = &files; + thread::Builder::new().spawn_scoped(scope, move || { + let files = cap_files; + +// let exec = glommio::LocalExecutorBuilder::new(Placement::Unbound).make().unwrap(); +// exec.run(async { +// println!("Running in glommio thread {core_idx}"); +// }); + + let mut rt = monoio::RuntimeBuilder::::new() + .build() + .unwrap(); + + let mut work_mem = vec![0; TEMP_MEM_SIZE].into_boxed_slice(); + let files_per_thread = files.len() / num_threads; + + rt.block_on(async { +// // Claim next file id +// loop { +// let work_idx = cap_next_file_idx.fetch_add(1, Ordering::Relaxed); +// if work_idx >= files.len() { +// return; +// } + + // Do work + let thread_start = thread_idx * files_per_thread; + for i in 0..files_per_thread { + let work_idx = thread_start + i; + + let path = &files[work_idx]; + let stat = unsafe { + &mut *(cap_stats_ptr as *mut Stats) + .offset(work_idx as isize) + }; + + let file = monoio::fs::File::open(path) + .await + .unwrap(); + + struct CappedReadBuf(pub Box<[u8]>, usize); + unsafe impl monoio::buf::IoBufMut for CappedReadBuf { + fn write_ptr(&mut self) -> *mut u8 { + monoio::buf::IoBufMut::write_ptr(&mut self.0) + } + + fn bytes_total(&mut self) -> usize { + self.1 + } + + unsafe fn set_init(&mut self, pos: usize) { + monoio::buf::IoBufMut::set_init(&mut self.0, pos) + } + } + + let meta = file.metadata().await.unwrap(); + let io_mem = mem::take(&mut work_mem); + let (res, buf) = file.read_exact_at(CappedReadBuf(io_mem, meta.len() as usize), 0).await; + res.unwrap(); + work_mem = buf.0; + + analyze(&work_mem, stat); + } + }) + +// exec.run(async { +// // Claim next file id +// let work_idx = cap_next_file_idx.fetch_add(1, Ordering::Relaxed); +// +// let path = &files[work_idx]; +// let stat = unsafe { +// &mut *(stats_ptr as *mut Stats) +// .offset(work_idx as isize) +// }; +// +// work(path, stat); +// }); + }).unwrap(); + } + }); + +// rayon::ThreadPoolBuilder::new() +// .num_threads(num_threads) +// .build_global() +// .unwrap(); +// +// files.par_iter() +// .enumerate() +// .for_each(|(idx, p)| { +// let s = unsafe { +// &mut *stats.as_ptr() +// .offset(idx as isize) +// .cast_mut() +// }; +// +//// let mut path = OsString::from("../../"); +//// path.push(p); +// let path = p; +// work(path, s); +// }); + +// thread::scope(|scope| { +// let files_per_thread = files.len() / num_threads; +// +// for thread_idx in 0..num_threads { +// let capture_files = &files; +// let capture_stats = &stats; +// thread::Builder::new().spawn_scoped(scope, move || { +// let files = capture_files; +// let stats = capture_stats; +// +// // Set thread affinity +// assert!(core_affinity::set_for_current(CoreId { id: thread_idx % num_cores })); +// +// // Do work +// let thread_start = thread_idx * files_per_thread; +// for i in 0..files_per_thread { +// let real_idx = thread_start + i; +// let file_path = &files[real_idx]; +// let st = unsafe { +// &mut *stats.as_ptr() +// .offset(real_idx as isize) +// .cast_mut() +// }; +// +// work(&file_path, st); +// } +// }).unwrap(); +// } +// }); + + println!("[PROFILE] processing text took {:?}", start_time.elapsed()); + + // Accumulate stats + let start_time = Instant::now(); + + let mut total_words = 0; + let mut total_capitalizeds = 0; + let mut total_sentences = 0; + let mut total_numbers = 0; + let mut total_forbiddens = 0; + + for stat in &stats { + total_words += stat.words; + total_capitalizeds += stat.capitalizeds; + total_sentences += stat.sentences; + total_numbers += stat.numbers; + total_forbiddens += stat.forbiddens; + } + + let capitalized_percentage = (total_capitalizeds as f32 / total_words as f32) * 100.0; + let forbidden_percentage = (total_forbiddens as f32 / total_words as f32) * 100.0; + let word_count_per_sentence = total_words as f32 / total_sentences as f32; + + println!(); + println!("Total Words: {total_words}"); + println!("Total Capitalized words: {total_capitalizeds}"); + println!("Total Sentences: {total_sentences}"); + println!("Total Numbers: {total_numbers}"); + println!("Total Forbidden words: {total_forbiddens}"); + println!("Capitalized percentage: {capitalized_percentage:.6}"); + println!("Forbidden percentage: {forbidden_percentage:.6}"); + println!("Word count per sentence: {word_count_per_sentence:.6}"); + println!("Total files read: {}", files.len()); + + println!("[PROFILE] accumulating stats took {:?}", start_time.elapsed()); + + println!("[PROFILE] total file reading took {:?}", &*TIME_SPENT_READING_FILES.lock().unwrap()); + + // Exit process to avoid running drops + process::exit(0); +} + +#[repr(C)] +struct FwTab { + // pub dir_and_len_bits: [u32; 256], + pub dir_len_bits: [u16; 256], + pub dir: [u8; 256], + pub strs: [u8; 256], +} + +impl FwTab { + pub fn build() -> Self { + // Sort fws by first char + let mut sorted_fws: Vec> = vec![vec![]; 256]; + + for word in FORBIDDEN_WORDS { + sorted_fws[word[0] as usize].push(&word); + } + for i in 0..256 { + sorted_fws[i].sort() + } + + // // DEBUG: + // println!("{:#?}", sorted_fws[b'@' as usize].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Build str tab + let mut fw_dir = [0u8; 256]; +// let mut fw_dir_len_bits = [0u32; 256]; + let mut fw_dir_len_bits = [0u16; 256]; + let mut fw_strs: Vec = vec![]; + + fw_strs.push(b'\0'); // push dummy value so that 0 in the dir means no-entries + + for c in 0..256 { + for fw in FORBIDDEN_WORDS { + if c == fw[0] as usize { + fw_dir_len_bits[c] |= 0x1 << fw.len(); + } + } + + if !sorted_fws[c].is_empty() { + let sublist_start_offset = fw_strs.len().try_into().unwrap(); + fw_dir[c] = sublist_start_offset; + + // DEBUG: + println!("{c} start offset: {}", sublist_start_offset); + println!("{:#?}", sorted_fws[c].iter().map(|s| str::from_utf8(s).unwrap()).collect::>()); + + // Push strings + for fw in &sorted_fws[c] { + fw_strs.push(fw.len().try_into().unwrap()); + for &c in &fw[1..] { + fw_strs.push(c); + } + } + + // Mark end of per-char word sublist + fw_strs.push(b'\0'); + } + } + + // DEBUG: + println!("strs len: {}", fw_strs.len()); + + assert_eq!(fw_dir.len(), 256); + assert!(fw_strs.len() <= 256); + + fw_strs.resize(256, 0); + + let tab = FwTab { + dir: fw_dir, + dir_len_bits: fw_dir_len_bits, +// dir_and_len_bits: array::from_fn(|idx| { +// (fw_dir_len_bits[idx] & 0xff_ff_ff) | ((fw_dir[idx] as u32) << 24) +// }), + strs: fw_strs.try_into().unwrap(), + }; + + // DEBUG: Test some strings + unsafe { + dbg!(tab.lookup(b"cpm")); + dbg!(tab.lookup(b"com")); + dbg!(tab.lookup(b"coma")); + dbg!(tab.lookup(b"co")); + dbg!(tab.lookup(b"cam")); + dbg!(tab.lookup(b"crypto")); + dbg!(tab.lookup(b"@")); + dbg!(tab.lookup(b"")); + dbg!(tab.lookup(b" ")); + dbg!(tab.lookup(b"test")); + dbg!(tab.lookup(b"expers")); + } + + tab + } + + pub fn compile(&self) { + println!("static FW_TAB: FwTab = FwTab {{"); + +// println!("\tdir_and_len_bits: ["); +// for chunk in self.dir_and_len_bits.chunks(16) { +// print!("\t\t"); +// for &b in chunk { +// print!("0x{b:08x}, "); +// } +// println!(); +// } +// println!("\t],"); + + println!("\tdir: ["); + for chunk in self.dir.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tdir_len_bits: ["); + for chunk in self.dir_len_bits.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:04x}, "); + } + println!(); + } + println!("\t],"); + + println!("\tstrs: ["); + for chunk in self.strs.chunks(16) { + print!("\t\t"); + for &b in chunk { + print!("0x{b:02x}, "); + } + println!(); + } + println!("\t],"); + + println!("}};"); + } + + #[inline] + pub unsafe fn lookup(&self, word: &[u8]) -> bool { + let &[first_char, ..] = word else { + return false; + }; + +// let dir_and_len_bits = unsafe { +// *self.dir_and_len_bits.get_unchecked(first_char as usize) +// }; +// if word.len() < 23 && ((dir_and_len_bits >> word.len()) & 0x1) == 0 { +// return false; +// } + + let len_bits = unsafe { + *self.dir_len_bits.get_unchecked(first_char as usize) + }; + if word.len() < 16 && ((len_bits >> word.len()) & 0x1) == 0 { + return false; + } + +// let mut str_offset = (dir_and_len_bits >> 24) as usize; + let mut str_offset = unsafe { + *self.dir.get_unchecked(first_char as usize) as usize + }; + + // Char doesn't have any strings in the table + if str_offset == 0 { + return false; + } + + // Iterate over strs + loop { +// let fw_len = u16::from_le_bytes([ +// self.strs[str_offset], +// self.strs[str_offset+1] +// ]); + let fw_len: u8 = unsafe { + *self.strs.get_unchecked(str_offset) + }; + + if fw_len == 0 { + // We've reached the end of the word sublist + return false; + } + + // Only compare words if they are the same length + if word.len() == fw_len as usize { + // Compare strs + let mut char_offset = 1usize; + loop { + // Found the word! + if char_offset == word.len() { + return true; + } + + let fw_char = unsafe { *self.strs.get_unchecked(str_offset + char_offset) }; + let word_char = unsafe { *word.get_unchecked(char_offset) }; + + if fw_char > word_char { + // Word can't possible be in the sorted list, return + return false; + } + if fw_char < word_char { + // Try next word + break; + } + + char_offset += 1; + } + } + + // Advance to next word +// let str_len_bytes = 2; + let str_len_bytes = 1; + str_offset += (fw_len as usize - 1) + str_len_bytes; + } + } +} + +const FORBIDDEN_WORDS: [&'static [u8]; 35] = [ + b"recovery", + b"techie", + b"http", + b"https", + b"digital", + b"hack", + b"::", + b"//", + b"com", + b"@", + b"crypto", + b"bitcoin", + b"wallet", + b"hacker", + b"welcome", + b"whatsapp", + b"email", + b"cryptocurrency", + b"stolen", + b"freeze", + b"quick", + b"crucial", + b"tracing", + b"scammers", + b"expers", + b"hire", + b"century", + b"transaction", + b"essential", + b"managing", + b"contact", + b"contacting", + b"understanding", + b"assets", + b"funds", +]; + +static FW_TAB: FwTab = FwTab { + dir: [ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, + 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], + dir_len_bits: [ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0004, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0002, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0040, 0x0080, 0x44c8, 0x0080, 0x0260, 0x0060, 0x0000, 0x0070, 0x0000, 0x0000, 0x0000, 0x0000, 0x0100, 0x0000, 0x0000, + 0x0000, 0x0020, 0x0100, 0x0140, 0x08c0, 0x2000, 0x0000, 0x01c0, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + ], + strs: [ + 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, + 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, + 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, + 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, + 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, + 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, + 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, + 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, + 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, + 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, + 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, + 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, + 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, + 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, + 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, + 0x61, 0x70, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + ], +}; + +//static FW_TAB_DIR: [u8; 256] = [ +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x07, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x09, 0x10, 0x18, 0x4f, 0x57, 0x6c, 0x00, 0x78, 0x00, 0x00, 0x00, 0x00, 0x90, 0x00, 0x00, +// 0x00, 0x99, 0x9f, 0xa8, 0xb7, 0xd0, 0x00, 0xde, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +// 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, +//]; +//static FW_TAB_STRS: [u8; 244] = [ +// 0x00, 0x02, 0x2f, 0x00, 0x02, 0x3a, 0x00, 0x01, 0x00, 0x06, 0x73, 0x73, 0x65, 0x74, 0x73, 0x00, +// 0x07, 0x69, 0x74, 0x63, 0x6f, 0x69, 0x6e, 0x00, 0x07, 0x65, 0x6e, 0x74, 0x75, 0x72, 0x79, 0x03, +// 0x6f, 0x6d, 0x07, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, 0x0a, 0x6f, 0x6e, 0x74, 0x61, 0x63, 0x74, +// 0x69, 0x6e, 0x67, 0x07, 0x72, 0x75, 0x63, 0x69, 0x61, 0x6c, 0x06, 0x72, 0x79, 0x70, 0x74, 0x6f, +// 0x0e, 0x72, 0x79, 0x70, 0x74, 0x6f, 0x63, 0x75, 0x72, 0x72, 0x65, 0x6e, 0x63, 0x79, 0x00, 0x07, +// 0x69, 0x67, 0x69, 0x74, 0x61, 0x6c, 0x00, 0x05, 0x6d, 0x61, 0x69, 0x6c, 0x09, 0x73, 0x73, 0x65, +// 0x6e, 0x74, 0x69, 0x61, 0x6c, 0x06, 0x78, 0x70, 0x65, 0x72, 0x73, 0x00, 0x06, 0x72, 0x65, 0x65, +// 0x7a, 0x65, 0x05, 0x75, 0x6e, 0x64, 0x73, 0x00, 0x04, 0x61, 0x63, 0x6b, 0x06, 0x61, 0x63, 0x6b, +// 0x65, 0x72, 0x04, 0x69, 0x72, 0x65, 0x04, 0x74, 0x74, 0x70, 0x05, 0x74, 0x74, 0x70, 0x73, 0x00, +// 0x08, 0x61, 0x6e, 0x61, 0x67, 0x69, 0x6e, 0x67, 0x00, 0x05, 0x75, 0x69, 0x63, 0x6b, 0x00, 0x08, +// 0x65, 0x63, 0x6f, 0x76, 0x65, 0x72, 0x79, 0x00, 0x08, 0x63, 0x61, 0x6d, 0x6d, 0x65, 0x72, 0x73, +// 0x06, 0x74, 0x6f, 0x6c, 0x65, 0x6e, 0x00, 0x06, 0x65, 0x63, 0x68, 0x69, 0x65, 0x07, 0x72, 0x61, +// 0x63, 0x69, 0x6e, 0x67, 0x0b, 0x72, 0x61, 0x6e, 0x73, 0x61, 0x63, 0x74, 0x69, 0x6f, 0x6e, 0x00, +// 0x0d, 0x6e, 0x64, 0x65, 0x72, 0x73, 0x74, 0x61, 0x6e, 0x64, 0x69, 0x6e, 0x67, 0x00, 0x06, 0x61, +// 0x6c, 0x6c, 0x65, 0x74, 0x07, 0x65, 0x6c, 0x63, 0x6f, 0x6d, 0x65, 0x08, 0x68, 0x61, 0x74, 0x73, +// 0x61, 0x70, 0x70, 0x00, +//]; diff --git a/Makefile b/Makefile index c69eb12..ea7c836 100644 --- a/Makefile +++ b/Makefile @@ -32,6 +32,8 @@ build_all: build build_py build_cpp build_borded_cpp build_risspam build_jest run: run_spam wl run_not_spam run_risspam: run_spam_risspam run_not_spam_risspam +bench_rust: build_risspam benchmark_only + format: clang-format *.c *.h -i