//Diagram Imports for Algorithms
import RF_Diagram from '../../Images/RF_Diagram.jpg'
import SVC_Diagram from '../../Images/SVC_Diagram.jpg'
import GB_Diagram from '../../Images/GB_Diagram.jpg'

//Content For model builder
const static_algorithms = {
    //Data is made like this for content mapping.
    support_vector_classifier: [
            {
                type: 'heads', 
                content: 'Support Vector Classifier'
            },
            {
                type: 'subheads',
                content: 'What is it?'
            },
            {
                type: 'text', 
                content: 'Support Vector Machine (SVM) is a supervised machine learning algorithm that can be used for both classification or regression challenges. However,  it is mostly used in classification problems. The objective of the support vector machine algorithm is to find a hyperplane in an N-dimensional space(N — the number of features) that distinctly classifies the data points. '
            },
            {
                type: 'subheads',
                content: 'How Does it Work?'
            },
            {
                type: 'text',
                content: 'The Support vector machine classifier works by finding the hyperplane that maximises the margin between the two classes. Support Vectors are simply the coordinates of individual observation. The SVM classifier is a frontier that best segregates the two classes (hyper-plane/ line). From there, after getting the hyperplane, you can then feed some features to your classifier to see what the "predicted" class is.'
            },
            {
                type: 'image',
                content: SVC_Diagram
            },
            {
                type: 'subheads',
                content: 'Why use it?'
            },
            {
                type:'text',
                content: " - It works really well with a clear margin of separation \n - It is effective in high dimensional spaces.\n - It is effective in cases where the number of dimensions is greater than the number of samples.\n - It uses a subset of training points in the decision function (called support vectors), so it is also memory efficient.\n - Different kernel functions can be specified for the decision function. Common kernels can be used, but it's also possible to specify custom kernels.\nSince SVMs can use any number of kernels, it's important to know about a few of them."
            },
            {
                type: 'heads',
                content: 'Kernel functions:'
            },
            {
                type: 'subheads',
                content: 'Linear'
            },            
            {
                type: 'text',
                content: 'These are commonly recommended for text classification because most of these types of classification problems are linearly separable.The linear kernel works really well when there are a lot of features, and text classification problems have a lot of features. Linear kernel functions are faster than most of the others and you have fewer parameters to optimise.'
            },
            {
                type: 'subheads',
                content: 'Polynomial'
            },
            {
                type: 'text',
                content: "The polynomial kernel isn't used in practice very often because it isn't as computationally efficient as other kernels and its predictions aren't as accurate."
            },
            {
                type: 'subheads',
                content: 'Gaussian Radial Basis Function (RBF)'
            },            
            {
                type: 'text',
                content: 'One of the most powerful and commonly used kernels in SVMs. Usually the choice for non-linear data.'
            },
            {
                type: 'subheads',
                content: 'Sigmoid'
            },
            {
                type: 'text',
                content: 'More useful in neural networks than in support vector machines, but there are occasional specific use cases.'
            }
    ],

    random_forest_classifier: [
            {
                type: 'heads', 
                content: 'Random Forest Classifier'
            },
            {
                type: 'subheads',
                content: 'What is it?'
            },
            {
                type: 'text', 
                content: 'A Random Forest Algorithm is a supervised machine learning algorithm which is extremely popular and is used for Classification and Regression problems in Machine Learning. We know that a forest comprises numerous trees, and the more trees there are, the more it will be robust. Similarly, the greater the number of trees in a Random Forest Algorithm, the higher its accuracy and problem-solving ability.'
            },
            {
                type: 'subheads',
                content: 'How does it work?'
            },
            {
                type: 'text',
                content: 'Random Forest is a classifier that contains several decision trees on various subsets of the given dataset and takes the average to improve the predictive accuracy of that dataset. It is based on the concept of ensemble learning which is a process of combining multiple classifiers to solve a complex problem and improve the performance of the model.'
            },
            {
                type: 'image',
                content: RF_Diagram 
            },
            {
                type: 'subheads',
                content: 'Why Use a Random Forest Algorithm?'
            },
            {
                type: 'text',
                content: 'One of the main advantages of using Random Forest Algorithm is that it reduces the risk of overfitting and the required training time. Additionally, it offers a high level of accuracy. Random Forest algorithm runs efficiently in large databases and produces highly accurate predictions by estimating missing data.n - It takes less training time as compared to other algorithms.\n - It predicts output with high accuracy, even for the large dataset it runs efficiently.\n - It can also maintain accuracy when a large proportion of data is missing.'
            },
            {
                type: 'subheads',
                content: 'Important Hyperparameters'
            },
            {
                type: 'text',
                content: 'Hyperparameters are used in random forests to either enhance the performance and predictive power of models or to make the model faster. The following hyperparameters are used to enhance the predictive power:\n - n_estimators: Number of trees built by the algorithm before averaging the products.\n - max_features: Maximum number of features random forest uses before considering splitting a node.\n - mini_sample_leaf: Determines the minimum number of leaves required to split an internal node.'
            },
            {
                type: 'text',
                content: 'The following hyperparameters are used to increase the speed of the model:\n - n_jobs: Conveys to the engine how many processors are allowed to use. If the value is 1, it can use only one processor, but if the value is -1,, there is no limit.\n - random_state: Controls randomness of the sample. The model will always produce the same results if it has a definite value of random state and if it has been given the same hyperparameters and the same training data.\n - oob_score: OOB (Out Of the Bag) is a random forest cross-validation method. In this, one-third of the sample is not used to train the data but to evaluate its performance. '
            }
    ],
    gradient_boosting_classifier: [
            {
                type: 'heads', 
                content: 'Gradient Boosting Classifier'
            },
            {
                type: 'subheads',
                content: 'What is it?'
            },
            {
                type: 'text', 
                content: 'Gradient boosting algorithm is one of the most powerful algorithms in the field of machine learning. Errors play a major role in any machine learning algorithm. There are mainly two types of error, bias error and variance error. Gradient boost algorithm helps us minimise bias error of the model.It can be used for predicting not only continuous target variable (as a Regressor) but also categorical target variable (as a Classifier). When it is used as a regressor, the cost function is Mean Square Error (MSE) and when it is used as a classifier then the cost function is Log loss.'
            },
            {
                type: 'subheads',
                content: 'How does it work?'
            },
            {
                type: 'text',
                content: 'The main idea behind this algorithm is to build models sequentially and these subsequent models try to reduce the errors of the previous model. But how do we do that? How do we reduce the error? This is done by building a new model on the errors or residuals of the previous model.'
            },
            {
                type: 'image',
                content: GB_Diagram
            },
            {
                type: 'text',
                content: 'The gradient boosting algorithm contains three elements. The loss function changes according to the problem at hand, weak learners who are used for making predictions, and the additive model where trees are added with a gradient descent procedure. The method predicts the best possible model by combining the next model with the previous ones, thus minimising the error.'
            },
            {
                type: 'heads',
                content: 'Forms of Boosting?'
            },
            {
                type: 'text',
                content: 'Boosting can take several forms, including:'
            },
            {
                type: 'subheads',
                content: '1. Adaptive Boosting (Adaboost)'
            },
            {
                type: 'text',
                content: 'Adaboost aims at combining several weak learners to form a single strong learner. Adaboost concentrates on weak learners, which are often decision trees with only one split and are commonly referred to as decision stumps. The first decision stump in Adaboost contains observations that are weighted equally.'
            },
            {
                type: 'text',
                content: 'Previous errors are corrected, and any observations that were classified incorrectly are assigned more weight than other observations that had no error in classification. Algorithms from Adaboost are popularly used in regression and classification procedures. An error noticed in previous models is adjusted with weighting until an accurate predictor is made.'
            },
            {
                type: 'subheads',
                content: '2. Gradient Boosting'
            },
            {
                type: 'text',
                content: 'Gradient boosting, just like any other ensemble machine learning procedure, sequentially adds predictors to the ensemble and follows the sequence in correcting preceding predictors to arrive at an accurate predictor at the end of the procedure. Adaboost corrects its previous errors by tuning the weights for every incorrect observation in every iteration. Still, gradient boosting aims at fitting a new predictor in the residual errors committed by the preceding predictor.'
            },
            {
                type: 'text',
                content: "Gradient boosting utilises the gradient descent to pinpoint the challenges in the learners' predictions used previously. The previous error is highlighted, and by combining one weak learner to the next learner, the error is reduced significantly over time."
            },
            {
                type: 'subheads',
                content: '3. XGBoost (Extreme Gradient Boosting)'
            },
            {
                type: 'text',
                content: 'XGBoost is reliant on the performance of a model and computational speed. It provides various benefits, such as parallelization, distributed computing, cache optimization, and out-of-core computing.'
            },
            {
                type: 'text',
                content: "XGBoost provides parallelization in tree building through the use of the CPU cores during training. It also distributes computing when it is training large models using machine clusters. Out-of-core computing is utilised for larger data sets that can’t fit in the conventional memory size. Cache optimization is also utilised for algorithms and data structures to optimise the use of available hardware"
            },
            {
                type: 'subheads',
                content: 'Why use it?'
            },
            {
                type: 'text',
                content: 'Gradient boosting is commonly used to reduce the chance of error when processing large and complex data. In addition, it is used to create the best possible predictions in regression and classification procedures.'
            },
    ]
}


const presetNodes = [
    {
        nodegroup: 'Chemical Identification',
        nodes: ['Plastic Classification', 'Drug Classification'] 
    }
]


export {static_algorithms, presetNodes}
